(38)DWS层

2024-01-22 23:08
文章标签 dws 38

本文主要是介绍(38)DWS层,希望对大家解决编程问题提供一定的参考价值,需要的开发者们随着小编来一起学习吧!

1 每日设备行为
每日设备行为,主要按照设备 id 统计。

 

1 )建表语句
hive (gmall)>
drop table if exists dws_uv_detail_daycount;
create external table dws_uv_detail_daycount
(
`mid_id` string COMMENT ' 设备 id',
`brand` string COMMENT ' 手机品牌 ',
`model` string COMMENT ' 手机型号 ',
`login_count` bigint COMMENT ' 活跃次数 ',
`page_stats` array<struct<page_id:string,page_count:bigint>> COMMENT '
面访问统计 '
) COMMENT ' 每日设备行为表 '
partitioned by(dt string)
stored as parquet
location '/warehouse/gmall/dws/dws_uv_detail_daycount'
tblproperties ("parquet.compression"="lzo");
2 )数据装载
hive (gmall)>
with
tmp_start as
(
select
mid_id,
brand,
model,
count(*) login_count
from dwd_start_log
where dt='2020-06-14'
group by mid_id,brand,model
),
tmp_page as
(
select
mid_id,
brand,
model,
collect_set(named_struct('page_id',page_id,'page_count',page_count))
page_stats
from
(
select
mid_id,
brand,
model,
page_id,
count(*) page_count
from dwd_page_log
where dt='2020-06-14'
group by mid_id,brand,model, page_id
)tmp
group by mid_id,brand,model
)
insert overwrite table dws_uv_detail_daycount partition(dt='2020-06-14')
select
nvl(tmp_start.mid_id,tmp_page.mid_id),
nvl(tmp_start.brand,tmp_page.brand),
nvl(tmp_start.model,tmp_page.model),
tmp_start.login_count,
tmp_page.page_stats
from tmp_start
full outer join tmp_page
on tmp_start.mid_id=tmp_page.mid_id
and tmp_start.brand=tmp_page.brand
and tmp_start.model=tmp_page.model;
3 )查询加载结果
hive (gmall)>
select * from dws_uv_detail_daycount where dt='2020-06-14' limit 2;
2 . 每日会员行为
1 )建表语句
hive (gmall)>
drop table if exists dws_user_action_daycount;
create external table dws_user_action_daycount
(
user_id string comment ' 用户 id',
login_count bigint comment ' 登录次数 ',
cart_count bigint comment ' 加入购物车次数 ',
order_count bigint comment ' 下单次数 ',
order_amount decimal(16,2) comment ' 下单金额 ',
payment_count bigint comment ' 支付次数 ',
payment_amount decimal(16,2) comment ' 支付金额 ',
order_detail_stats
array<struct<sku_id:string,sku_num:bigint,order_count:bigint,order_amount:de
cimal(20,2)>> comment ' 下单明细统计 '
) COMMENT ' 每日会员行为 '
PARTITIONED BY (`dt` string)
stored as parquet
location '/warehouse/gmall/dws/dws_user_action_daycount/'
tblproperties ("parquet.compression"="lzo");
2 )数据装载

 

hive (gmall)>
with
tmp_login as
(
select
user_id,
count(*) login_count
from dwd_start_log
where dt='2020-06-14'
and user_id is not null
group by user_id
),
tmp_cart as
(
select
user_id,
count(*) cart_count
from dwd_action_log
where dt='2020-06-14'
and user_id is not null
and action_id='cart_add'
group by user_id
),tmp_order as
(
select
user_id,
count(*) order_count,
sum(final_total_amount) order_amount
from dwd_fact_order_info
where dt='2020-06-14'
group by user_id
) ,
tmp_payment as
(
select
user_id,
count(*) payment_count,
sum(payment_amount) payment_amount
from dwd_fact_payment_info
where dt='2020-06-14'
group by user_id
),
tmp_order_detail as
(
select
user_id,
collect_set(named_struct('sku_id',sku_id,'sku_num',sku_num,'order_count',ord
er_count,'order_amount',order_amount)) order_stats
from
(
select
user_id,
sku_id,
sum(sku_num) sku_num,
count(*) order_count,
cast(sum(final_amount_d) as decimal(20,2)) order_amount
from dwd_fact_order_detail
where dt='2020-06-14'
group by user_id,sku_id
)tmp
group by user_id
)
insert overwrite table dws_user_action_daycount partition(dt='2020-06-14')
select
tmp_login.user_id,
login_count,
nvl (cart_count,0),
nvl(order_count,0),
nvl(order_amount,0.0),
nvl(payment_count,0),
nvl(payment_amount,0.0),
order_stats
from tmp_login
left join tmp_cart on tmp_login.user_id=tmp_cart.user_id
left join tmp_order on tmp_login.user_id=tmp_order.user_id
left join tmp_payment on tmp_login.user_id=tmp_payment.user_id
left join tmp_order_detail on tmp_login.user_id=tmp_order_detail.user_id;
3 )查询加载结果
hive (gmall)>
select * from dws_user_action_daycount where dt='2020-06-14' limit 2;
3 每日商品行为
1 )建表语句
hive (gmall)>
drop table if exists dws_sku_action_daycount;
create external table dws_sku_action_daycount
(
sku_id string comment 'sku_id',
order_count bigint comment ' 被下单次数 ',
order_num bigint comment ' 被下单件数 ',
order_amount decimal(16,2) comment ' 被下单金额 ',
payment_count bigint comment ' 被支付次数 ',
payment_num bigint comment ' 被支付件数 ',
payment_amount decimal(16,2) comment ' 被支付金额 ',
refund_count bigint comment ' 被退款次数 ',
refund_num bigint comment ' 被退款件数 ',
refund_amount decimal(16,2) comment ' 被退款金额 ',
cart_count bigint comment ' 被加入购物车次数 ',
favor_count bigint comment ' 被收藏次数 ',
appraise_good_count bigint comment ' 好评数 ',
appraise_mid_count bigint comment ' 中评数 ',
appraise_bad_count bigint comment ' 差评数 ',
appraise_default_count bigint comment ' 默认评价数 '
) COMMENT ' 每日商品行为 '
PARTITIONED BY (`dt` string)
stored as parquet
location '/warehouse/gmall/dws/dws_sku_action_daycount/'
tblproperties ("parquet.compression"="lzo");
2 )数据装载
注意:如果是 23 59 下单,支付日期跨天。需要从订单详情里面取出支付时间是今天,且
订单时间是昨天或者今天的订单。
hive (gmall)>
with
tmp_order as
(
select
sku_id,
count(*) order_count,
sum(sku_num) order_num,
sum(final_amount_d) order_amount
from dwd_fact_order_detail
where dt='2020-06-14'
group by sku_id
),
tmp_payment as
(
select
sku_id,
count(*) payment_count,
sum(sku_num) payment_num,
sum(final_amount_d) payment_amount
from dwd_fact_order_detail
where (dt='2020-06-14'
or dt=date_add('2020-06-14',-1))
and order_id in
(
select
id
from dwd_fact_order_info
where (dt='2020-06-14'
or dt=date_add('2020-06-14',-1))
and date_format(payment_time,'yyyy-MM-dd')='2020-06-14'
)
group by sku_id
),
tmp_refund as
(
select
sku_id,
count(*) refund_count,
sum(refund_num) refund_num,
sum(refund_amount) refund_amount
from dwd_fact_order_refund_info
where dt='2020-06-14'
group by sku_id
),
tmp_cart as
(
select
item sku_id,
count(*) cart_count
from dwd_action_log
where dt='2020-06-14'
and user_id is not null
and action_id='cart_add'
group by item
),tmp_favor as
(
select
item sku_id,
count(*) favor_count
from dwd_action_log
where dt='2020-06-14'
and user_id is not null
and action_id='favor_add'
group by item
),
tmp_appraise as
(
select
sku_id,
sum(if(appraise='1201',1,0)) appraise_good_count,
sum(if(appraise='1202',1,0)) appraise_mid_count,
sum(if(appraise='1203',1,0)) appraise_bad_count,
sum(if(appraise='1204',1,0)) appraise_default_count
from dwd_fact_comment_info
where dt='2020-06-14'
group by sku_id
)
insert overwrite table dws_sku_action_daycount partition(dt='2020-06-14')
select
sku_id,
sum(order_count),
sum(order_num),
sum(order_amount),
sum(payment_count),
sum(payment_num),
sum(payment_amount),
sum(refund_count),
sum(refund_num),
sum(refund_amount),
sum(cart_count),
sum(favor_count),
sum(appraise_good_count),
sum(appraise_mid_count),
sum(appraise_bad_count),
sum(appraise_default_count)
from
(
select
sku_id,
order_count,
order_num,
order_amount,
0 payment_count,
0 payment_num,
0 payment_amount,
0 refund_count,
0 refund_num,
0 refund_amount,
0 cart_count,
0 favor_count,
0 appraise_good_count,
0 appraise_mid_count,
0 appraise_bad_count,
0 appraise_default_count
from tmp_order
union all
select
sku_id,
0 order_count,
0 order_num,
0 order_amount,
payment_count,
payment_num,
payment_amount,
0 refund_count,
0 refund_num,
0 refund_amount,
0 cart_count,
0 favor_count,
0 appraise_good_count,
0 appraise_mid_count,
0 appraise_bad_count,
0 appraise_default_count
from tmp_payment
union all
select
sku_id,
0 order_count,
0 order_num,
0 order_amount,
0 payment_count,
0 payment_num,
0 payment_amount,
refund_count,
refund_num,
refund_amount,
0 cart_count,
0 favor_count,
0 appraise_good_count,
0 appraise_mid_count,
0 appraise_bad_count,
0 appraise_default_count
from tmp_refund
union all
select
sku_id,
0 order_count,
0 order_num,
0 order_amount,
0 payment_count,
0 payment_num,
0 payment_amount,
0 refund_count,
0 refund_num,
0 refund_amount,
cart_count,
0 favor_count,
0 appraise_good_count,
0 appraise_mid_count,
0 appraise_bad_count,
0 appraise_default_count
from tmp_cart
union all
select
sku_id,
0 order_count,
0 order_num,
0 order_amount,
0 payment_count,
0 payment_num,
0 payment_amount,
0 refund_count,
0 refund_num,
0 refund_amount,
0 cart_count,
favor_count,
0 appraise_good_count,
0 appraise_mid_count,
0 appraise_bad_count,
0 appraise_default_count
from tmp_favor
union all
select
sku_id,
0 order_count,
0 order_num,
0 order_amount,
0 payment_count,
0 payment_num,
0 payment_amount,
0 refund_count,
0 refund_num,
0 refund_amount,
0 cart_count,
0 favor_count,
appraise_good_count,
appraise_mid_count,
appraise_bad_count,
appraise_default_count
from tmp_appraise
)tmp
group by sku_id;
3 )查询加载结果
hive (gmall)>
select * from dws_sku_action_daycount where dt='2020-06-14' limit 2;
(注:剩下主要是dws层,dwt,ADS层,表字段建立,及结合业务sql语句,内容主要结合实际业务情况,不做过多展示,主要是相关分层思路,在前期文档中也有相关介绍,建表导表过场相对枯燥,并且也对sql有一定要求,要夯实ssql基本功)

这篇关于(38)DWS层的文章就介绍到这儿,希望我们推荐的文章对编程师们有所帮助!



http://www.chinasem.cn/article/634532

相关文章

LLM系列 | 38:解读阿里开源语音多模态模型Qwen2-Audio

引言 模型概述 模型架构 训练方法 性能评估 实战演示 总结 引言 金山挂月窥禅径,沙鸟听经恋法门。 小伙伴们好,我是微信公众号《小窗幽记机器学习》的小编:卖铁观音的小男孩,今天这篇小作文主要是介绍阿里巴巴的语音多模态大模型Qwen2-Audio。近日,阿里巴巴Qwen团队发布了最新的大规模音频-语言模型Qwen2-Audio及其技术报告。该模型在音频理解和多模态交互

leetcode#38. Count and Say

The count-and-say sequence is the sequence of integers with the first five terms as following: 1. 12. 113. 214. 12115. 111221 1 is read off as “one 1” or 11. 11 is read off

2024国赛数学建模B题完整分析参考论文38页(含模型和可运行代码)

2024 高教社杯全国大学生数学建模完整分析参考论文 B 题 生产过程中的决策问题 目录 摘要 一、问题重述 二、问题分析 三、 模型假设 四、 模型建立与求解 4.1问题1 4.1.1问题1思路分析 4.1.2问题1模型建立 4.1.3问题1样例代码(仅供参考) 4.1.4问题1样例代码运行结果(仅供参考) 4.2问题2 4.2.1问题2思路分析 4.2.2问题2模

数据库系统 第38节 数据库备份

数据库备份是确保数据安全和完整性的重要手段。它涉及创建数据库的副本,以便在数据丢失或损坏的情况下可以恢复。数据库备份可以采取多种形式,包括完全备份、增量备份和差异备份。下面将详细说明每种备份类型,并提供相应的源代码示例。 1. 完全备份 完全备份是数据库的完整副本,包括所有数据和数据库对象(如表、索引、视图等)。这种备份通常用于初始备份或在长时间间隔后进行备份。 示例代码(Python):

LeetCode - 38. Count and Say

38. Count and Say  Problem's Link  ---------------------------------------------------------------------------- Mean:  题目意思太晦涩。 1 读出来 就是“1个1” 所以记为“11” 11 读出来 就是“2个1” 所以记为“21” 21 读出来 就是“1个

【软件逆向】第38课,软件逆向安全工程师之操作标志寄存器实例,每天5分钟学习逆向吧!

在这些实例学习中,我们使用汇编指令来操作标志寄存器,并根据标志寄存器的状态进行条件分支。这些操作对于编写高效的汇编程序以及理解程序的行为至关重要 实例 1:使用 PUSHF 和 POPF 保存和恢复标志寄存器状态 section .text global _start _start: ; 初始化 AL 寄存器 MOV AL, 0xFF ; 对 AL 寄存器进行加一操作,这将导致 AL 寄存器的

Java38: 数据库二(Oracle)

group by 分组 --每个工作的人数select count(job) from emp group by job;--有多少工作select count(coutn(job)) from emp group by job;--每个工作的最高工资select max(sal) from emp group by job;--每个工作的最低工资select min(sal

大数据-案例-离线数仓-在线教育:MySQL(业务数据)-ETL(Sqoop)->Hive数仓【ODS层-数据清洗->DW层(DWD-统计分析->DWS)】-导出(Sqoop)->MySQL->可视化

一、商业BI系统概述 商业智能系统,通常简称为商业智能系统,是商业智能软件的简称,是为提高企业经营绩效而采用的一系列方法、技术和软件的总和。通常被理解为将企业中的现有数据转换为知识并帮助企业做出明智的业务决策的工具。 BI系统中的数据来自企业的其他业务系统。例如,一个面向业务的企业,其业务智能系统数据包括业务系统订单、库存、交易账户、客户和供应商信息,以及企业所属行业和竞争对手的数据,以及其他

38集 ESP32 离线刷机教程

38集 ESP32 离线刷机教程 打开离线刷机工具 e:\technology\ESP32\esp32s3Qorvo\esp32-s3_rtc\ESP离线刷机\flash_download_tool_3.9.7_1\flash_download_tool_3.9.7\flash_download_tool_3.9.7.exe 设置如下: 要知道如何设置,需要知道esp32s3 工程的memor

云计算实训38——docker网络、跨主机容器之间的通讯

一、docker⽹络 1.桥接--bridge 所有容器连接到桥就可以使⽤外⽹,使⽤nat让容器可以访问外⽹ 使⽤ ip a s指令查看桥,所有容器连接到此桥,ip地址都是 172.17.0.0/16 ⽹段,桥是启动docker服务后出现,在centos使⽤ bridge-utils安装 1.下载bridge-utilsyum -y install bridge-utils.x86_