本文主要是介绍分析项目需求后的ods层和dwd层建表和插入语句(全量脚本),希望对大家解决编程问题提供一定的参考价值,需要的开发者们随着小编来一起学习吧!
ods层
主要在ods干的事,就是根据需求将所需要的表的数据从mysql中导入到ods层中,所以需要在hive中建立ods层的表,并且在导入的时候需要根据需求过滤一下空数据,并在建表时加入一个dt字段记录导入的时间
ods层建表语句
1,edu_ods层建立ods_customer_relationship
drop table edu_ods.ods_customer_relationship;
CREATE TABLE edu_ods.ods_customer_relationship (`id` bigint ,`create_date_time` string ,`update_date_time` string COMMENT '最后更新时间',`deleted` bigint COMMENT '是否被删除(禁用)',`customer_id` bigint COMMENT '所属客户id',`first_id` bigint COMMENT '第一条客户关系id',`belonger` bigint COMMENT '归属人',`belonger_name` string COMMENT '归属人姓名',`initial_belonger` bigint COMMENT '初始归属人',`distribution_handler` bigint COMMENT '分配处理人',`business_scrm_department_id` bigint COMMENT '归属部门',`last_visit_time` string COMMENT '最后回访时间',`next_visit_time` string COMMENT '下次回访时间',`origin_type` string COMMENT '数据来源',`itcast_school_id` bigint COMMENT '校区Id',`itcast_subject_id` bigint COMMENT '学科Id',`intention_study_type` string COMMENT '意向学习方式',`anticipat_signup_date` string COMMENT '预计报名时间',`level` string COMMENT '客户级别',`creator` bigint COMMENT '创建人',`current_creator` bigint COMMENT '当前创建人:初始==创建人,当在公海拉回时为 拉回人',`creator_name` string COMMENT '创建者姓名',`origin_channel` string COMMENT '来源渠道',`comment` string COMMENT '备注',`first_customer_clue_id` bigint COMMENT '第一条线索id',`last_customer_clue_id` bigint COMMENT '最后一条线索id',`process_state` string COMMENT '处理状态',`process_time` string COMMENT '处理状态变动时间',`payment_state` string COMMENT '支付状态',`payment_time` string COMMENT '支付状态变动时间',`signup_state` string COMMENT '报名状态',`signup_time` string COMMENT '报名时间',`notice_state` string COMMENT '通知状态',`notice_time` string COMMENT '通知状态变动时间',`lock_state` bigint COMMENT '锁定状态',`lock_time` string COMMENT '锁定状态修改时间',`itcast_clazz_id` bigint COMMENT '所属ems班级id',`itcast_clazz_time` string COMMENT '报班时间',`payment_url` string COMMENT '付款链接',`payment_url_time` string COMMENT '支付链接生成时间',`ems_student_id` bigint COMMENT 'ems的学生id',`delete_reason` string COMMENT '删除原因',`deleter` bigint COMMENT '删除人',`deleter_name` string COMMENT '删除人姓名',`delete_time` string COMMENT '删除时间',`course_id` bigint COMMENT '课程ID',`course_name` string COMMENT '课程名称',`delete_comment` string COMMENT '删除原因说明',`close_state` string COMMENT '关闭装填',`close_time` string COMMENT '关闭状态变动时间',`appeal_id` bigint COMMENT '申诉id',`tenant` bigint COMMENT '租户',`total_fee` decimal(19,0) COMMENT '报名费总金额',`belonged` bigint COMMENT '小周期归属人',`belonged_time` string COMMENT '归属时间',`belonger_time` string COMMENT '归属时间',`transfer` bigint COMMENT '转移人',`transfer_time` string COMMENT '转移时间',`follow_type` bigint COMMENT '分配类型,0-自动分配,1-手动分配,2-自动转移,3-手动单个转移,4-手动批量转移,5-公海领取',`transfer_bxg_oa_account` string COMMENT '转移到博学谷归属人OA账号',`transfer_bxg_belonger_name` string COMMENT '转移到博学谷归属人OA姓名'
) COMMENT '客户意向表'
partitioned by (dt string)
row format delimited fields terminated by '\t' stored as orc tblproperties ('orc.compress'='ZLIB');2,edu_ods层建立ods_customer_clue
drop table edu_ods.`ods_customer_clue`;
CREATE TABLE edu_ods.`ods_customer_clue` (`id` bigint ,`create_date_time` string COMMENT '创建时间',`update_date_time` string COMMENT '最后更新时间',`deleted` bigint COMMENT '是否被删除(禁用)',`customer_id` bigint COMMENT '客户id',`customer_relationship_id` bigint COMMENT '客户关系id',`session_id` string COMMENT '七陌会话id',`sid` string COMMENT '访客id',`status` string COMMENT '状态(undeal待领取 deal 已领取 finish 已关闭 changePeer 已流转)',`user` string COMMENT '所属坐席',`create_time` string COMMENT '七陌创建时间',`platform` string COMMENT '平台来源 (pc-网站咨询|wap-wap咨询|sdk-app咨询|weixin-微信咨询)',`s_name` string COMMENT '用户名称',`seo_source` string COMMENT '搜索来源',`seo_keywords` string COMMENT '关键字',`ip` string COMMENT 'IP地址',`referrer` string COMMENT '上级来源页面',`from_url` string COMMENT '会话来源页面',`landing_page_url` string COMMENT '访客着陆页面',`url_title` string COMMENT '咨询页面title',`to_peer` string COMMENT '所属技能组',`manual_time` string COMMENT '人工开始时间',`begin_time` string COMMENT '坐席领取时间 ',`reply_msg_count` bigint COMMENT '客服回复消息数',`total_msg_count` bigint COMMENT '消息总数',`msg_count` bigint COMMENT '客户发送消息数',`comment` string COMMENT '备注',`finish_reason` string COMMENT '结束类型',`finish_user` string COMMENT '结束坐席',`end_time` string COMMENT '会话结束时间',`platform_description` string COMMENT '客户平台信息',`browser_name` string COMMENT '浏览器名称',`os_info` string COMMENT '系统名称',`area` string COMMENT '区域',`country` string COMMENT '所在国家',`province` string COMMENT '省',`city` string COMMENT '城市',`creator` bigint COMMENT '创建人',`name` string COMMENT '客户姓名',`idcard` string COMMENT '身份证号',`phone` string COMMENT '手机号',`itcast_school_id` bigint COMMENT '校区Id',`itcast_school` string COMMENT '校区',`itcast_subject_id` bigint COMMENT '学科Id',`itcast_subject` string COMMENT '学科',`wechat` string COMMENT '微信',`qq` string COMMENT 'qq号',`email` string COMMENT '邮箱',`gender` string COMMENT '性别',`level` string COMMENT '客户级别',`origin_type` string COMMENT '数据来源渠道',`information_way` string COMMENT '资讯方式',`working_years` string COMMENT '开始工作时间',`technical_directions` string COMMENT '技术方向',`customer_state` string COMMENT '当前客户状态',`valid` bigint COMMENT '该线索是否是网资有效线索',`anticipat_signup_date` string COMMENT '预计报名时间',`clue_state` string COMMENT '线索状态',`scrm_department_id` bigint COMMENT 'SCRM内部部门id',`superior_url` string COMMENT '诸葛获取上级页面URL',`superior_source` string COMMENT '诸葛获取上级页面URL标题',`landing_url` string COMMENT '诸葛获取着陆页面URL',`landing_source` string COMMENT '诸葛获取着陆页面URL来源',`info_url` string COMMENT '诸葛获取留咨页URL',`info_source` string COMMENT '诸葛获取留咨页URL标题',`origin_channel` string COMMENT '投放渠道',`course_id` bigint,`course_name` string ,`zhuge_session_id` string,`is_repeat` bigint COMMENT '是否重复线索(手机号维度) 0:正常 1:重复',`tenant` bigint COMMENT '租户id',`activity_id` string COMMENT '活动id',`activity_name` string COMMENT '活动名称',`follow_type` bigint COMMENT '分配类型,0-自动分配,1-手动分配,2-自动转移,3-手动单个转移,4-手动批量转移,5-公海领取',`shunt_mode_id` bigint COMMENT '匹配到的技能组id',`shunt_employee_group_id` bigint COMMENT '所属分流员工组'
) COMMENT '客户线索表'
partitioned by (dt string)
row format delimited fields terminated by '\t' stored as orc tblproperties ('orc.compress'='ZLIB');3,,edu_ods层建立ods_employee
drop table edu_ods.ods_employee;
create table ods_employee
(id bigint ,email string comment '公司邮箱,OA登录账号',real_name string comment '员工的真实姓名',phone string comment '手机号,目前还没有使用;隐私问题OA接口没有提供这个属性,',department_id string comment 'OA中的部门编号,有负值',department_name string comment 'OA中的部门名',remote_login bigint comment '员工是否可以远程登录',job_number string comment '员工工号',cross_school bigint comment '是否有跨校区权限',last_login_date string comment '最后登录日期',creator bigint comment '创建人',create_date_time string comment '创建时间',update_date_time string comment '最后更新时间',deleted bigint comment '是否被删除(禁用)',scrm_department_id bigint comment 'SCRM内部部门id',leave_office bigint comment '离职状态',leave_office_time string comment '离职时间',reinstated_time string comment '复职时间',superior_leaders_id bigint comment '上级领导ID',tdepart_id bigint comment '直属部门',tenant bigint,ems_user_name string
)comment '员工信息表'
partitioned by (dt string)
row format delimited fields terminated by '\t' stored as orc tblproperties ('orc.compress'='ZLIB');4,,edu_ods层建立ods_scrm_department
drop table edu_ods.`ods_scrm_department`;
CREATE TABLE edu_ods.`ods_scrm_department` (`id` bigint COMMENT '部门id',`name` string COMMENT '部门名称',`parent_id` bigint COMMENT '父部门id',`create_date_time` string COMMENT '创建时间',`update_date_time` string COMMENT '更新时间',`deleted` bigint COMMENT '删除标志',`id_path` string COMMENT '编码全路径',`tdepart_code` bigint COMMENT '直属部门',`creator` string COMMENT '创建者',`depart_level` bigint COMMENT '部门层级',`depart_sign` bigint COMMENT '部门标志,暂时默认1',`depart_line` bigint COMMENT '业务线,存储业务线编码',`depart_sort` bigint COMMENT '排序字段',`disable_flag` bigint COMMENT '禁用标志',`tenant` bigint
) comment '部门表'
partitioned by (dt string)
row format delimited fields terminated by '\t' stored as orc tblproperties ('orc.compress'='ZLIB');5,edu_ods层建立ods_customer_appeal
drop table edu_ods.`ods_customer_appeal`;
CREATE TABLE edu_ods.`ods_customer_appeal` (`id` bigint COMMENT '主键',`customer_relationship_first_id` bigint COMMENT '第一条客户关系id',`employee_id` bigint COMMENT '申诉人',`employee_name` string COMMENT '申诉人姓名',`employee_department_id` bigint COMMENT '申诉人部门',`employee_tdepart_id` bigint COMMENT '申诉人所属部门',`appeal_status` bigint COMMENT '申诉状态,0:待稽核 1:无效 2:有效',`audit_id` bigint COMMENT '稽核人id',`audit_name` string COMMENT '稽核人姓名',`audit_department_id` bigint COMMENT '稽核人所在部门',`audit_department_name` string COMMENT '稽核人部门名称',`audit_date_time` string COMMENT '稽核时间',`create_date_time` string COMMENT '创建时间(申诉时间)',`update_date_time` string COMMENT '更新时间',`deleted` bigint COMMENT '删除标志位',`tenant` bigint
) comment '线索申诉表'
partitioned by (dt string)
row format delimited fields terminated by '\t' stored as orc tblproperties ('orc.compress'='ZLIB');6,edu_ods层建立ods_itcast_clazz
CREATE TABLE edu_ods.`ods_itcast_clazz` (`id` string COMMENT 'ems课程id(非自增)',`create_date_time` string COMMENT '创建时间',`update_date_time` string COMMENT '最后更新时间',`deleted` string COMMENT '是否被删除(禁用)',`itcast_school_id` string COMMENT 'ems校区ID',`itcast_school_name` string COMMENT 'ems校区名称',`itcast_subject_id` string COMMENT 'ems学科ID',`itcast_subject_name` string COMMENT 'ems学科名称',`itcast_brand` string COMMENT 'ems品牌',`clazz_type_state` string COMMENT '班级类型状态',`clazz_type_name` string COMMENT '班级类型名称',`teaching_mode` string COMMENT '授课模式',`start_time` string COMMENT '开班时间',`end_time` string COMMENT '毕业时间',`comment` string COMMENT '备注',`detail` string COMMENT '详情(比如:27期)',`uncertain` string COMMENT '待定班(0:否,1:是)',`tenant` string )comment '报名课程表'
partitioned by (dt string)row format delimited fields terminated by '\t'
stored as orc tblproperties ('orc.compress'='ZLIB');
数据从mysql导入ods层语句
将数据从mysql中导入到hive中的edu_scrm_ods 数据库下的customer
/usr/bin/sqoop import --connect jdbc:mysql://192.168.88.80:3306/edu_scrm \
--username root \
--password 123456 \
--query "SELECT * ,${dateStr} as dt FROM edu_scrm.customer_${tableMonth}
WHERE wce.create_time BETWEEN '${dateStr} 00:00:00' AND '${dateStr} 23:59:59' and \$CONDITIONS" \
--hcatalog-database edu_ods \
--hcatalog-table ods_customer \
-m 1将数据从mysql中导入到hive中的edu_scrm_ods 数据库下的customer_appeal
/usr/bin/sqoop import --connect jdbc:mysql://192.168.88.80:3306/edu_scrm \
--username root \
--password 123456 \
--query 'select *, "2023-12-07" as dt from customer_appeal where 1=1 and $CONDITIONS' \
--hcatalog-database edu_ods \
--hcatalog-table ods_customer_appeal \
--m 1;将数据从mysql中导入到hive中的edu_scrm_ods 数据库下的customer_clue
/usr/bin/sqoop import --connect jdbc:mysql://192.168.88.80:3306/edu_scrm \
--username root \
--password 123456 \
--query 'select *, "2023-12-07" as dt from customer_clue where 1=1 and $CONDITIONS' \
--hcatalog-database edu_ods \
--hcatalog-table ods_customer_clue \
--m 1;将数据从mysql中导入到hive中的edu_scrm_ods 数据库下的customer_relationship
/usr/bin/sqoop import --connect jdbc:mysql://192.168.88.80:3306/edu_scrm \
--username root \
--password 123456 \
--query 'select *, "2023-12-07" as dt from customer_relationship where 1=1 and $CONDITIONS' \
--hcatalog-database edu_ods \
--hcatalog-table ods_customer_relationship \
--m 1;将数据从mysql中导入到hive中的edu_scrm_ods 数据库下的employee
/usr/bin/sqoop import --connect jdbc:mysql://192.168.88.80:3306/edu_scrm \
--username root \
--password 123456 \
--query 'select *, "2023-12-07" as dt from employee where 1=1 and $CONDITIONS' \
--hcatalog-database edu_ods \
--hcatalog-table ods_employee \
--m 1;将数据从mysql中导入到hive中的edu_scrm_ods 数据库下的itcast_clazz
/usr/bin/sqoop import --connect jdbc:mysql://192.168.88.80:3306/edu_scrm \
--username root \
--password 123456 \
--query 'select *, "2023-12-07" as dt from itcast_clazz where 1=1 and $CONDITIONS' \
--hcatalog-database edu_ods \
--hcatalog-table ods_itcast_clazz \
--m 1;将数据从mysql中导入到hive中的edu_scrm_ods 数据库下的itcast_school
/usr/bin/sqoop import --connect jdbc:mysql://192.168.88.80:3306/edu_scrm \
--username root \
--password 123456 \
--query 'select *, "2023-12-07" as dt from itcast_school where 1=1 and $CONDITIONS' \
--hcatalog-database edu_ods \
--hcatalog-table ods_itcast_school \
--m 1;将数据从mysql中导入到hive中的edu_scrm_ods 数据库下的itcast_subject
/usr/bin/sqoop import --connect jdbc:mysql://192.168.88.80:3306/edu_scrm \
--username root \
--password 123456 \
--query 'select *, "2023-12-07" as dt from itcast_subject where 1=1 and $CONDITIONS' \
--hcatalog-database edu_ods \
--hcatalog-table ods_itcast_subject\
--m 1;将数据从mysql中导入到hive中的edu_ods 数据库下的scrm_department
/usr/bin/sqoop import --connect jdbc:mysql://192.168.88.80:3306/edu_scrm \
--username root \
--password 123456 \
--query 'select *, "2023-12-07" as dt from scrm_department where 1=1 and $CONDITIONS' \
--hcatalog-database edu_ods \
--hcatalog-table ods_scrm_department \
--m 1;
全量脚本将数据从mysql导入hive的ods层的全量脚本
export SQOOP_HOME=/usr/bin/sqoopdateStr=`date -d '-1 day' +'%Y-%m-%d'` ###获取昨天的日期,用于增量导入
tableMonth=`date -d '-1 day' +'%Y_%m'` ###获取当年的年月,用于导入数据的业务数据库的表名称jdbcUrl='jdbc:mysql://106.75.33.59:3306/edu_scrm?useUnicode=true&characterEncoding=UTF-8&autoReconnect=true'
username='itcast_edu_stu'
password='itcast_edu_stu'### 将数据从mysql中导入到hive中的edu_scrm_ods 数据库下的customer_appeal
/usr/bin/sqoop import "-Dorg.apache.sqoop.splitter.allow_text_splitter=true" \
--connect ${jdbcUrl} \
--username ${username} \
--password ${password} \
--query 'select *, "2023-12-07" as dt from customer_appeal where 1=1 and $CONDITIONS' and dt=dateStr \
--hcatalog-database edu_ods \
--hcatalog-table ods_customer_appeal \
--m 1;### 将数据从mysql中导入到hive中的edu_scrm_ods 数据库下的customer_clue
/usr/bin/sqoop import "-Dorg.apache.sqoop.splitter.allow_text_splitter=true" \
--connect ${jdbcUrl} \
--username ${username} \
--password ${password} \
--query 'select *, "2023-12-07" as dt from customer_clue where 1=1 and $CONDITIONS' \
--hcatalog-database edu_ods \
--hcatalog-table ods_customer_clue \
--m 1;### 将数据从mysql中导入到hive中的edu_scrm_ods 数据库下的customer_relationship
/usr/bin/sqoop import "-Dorg.apache.sqoop.splitter.allow_text_splitter=true" \
--connect ${jdbcUrl} \
--username ${username} \
--password ${password} \
--query 'select *, "2023-12-07" as dt from customer_relationship where 1=1 and $CONDITIONS' \
--hcatalog-database edu_ods \
--hcatalog-table ods_customer_relationship \
--m 1;### 将数据从mysql中导入到hive中的edu_scrm_ods 数据库下的employee
/usr/bin/sqoop import "-Dorg.apache.sqoop.splitter.allow_text_splitter=true" \
--connect ${jdbcUrl} \
--username ${username} \
--password ${password} \
--query 'select *, "2023-12-07" as dt from employee where 1=1 and $CONDITIONS' \
--hcatalog-database edu_ods \
--hcatalog-table ods_employee \
--m 1;### 将数据从mysql中导入到hive中的edu_scrm_ods 数据库下的itcast_clazz
/usr/bin/sqoop import "-Dorg.apache.sqoop.splitter.allow_text_splitter=true" \
--connect ${jdbcUrl} \
--username ${username} \
--password ${password} \
--query 'select *, "2023-12-07" as dt from itcast_clazz where 1=1 and $CONDITIONS' \
--hcatalog-database edu_ods \
--hcatalog-table ods_itcast_clazz \
--m 1;### 将数据从mysql中导入到hive中的edu_ods 数据库下的scrm_department
/usr/bin/sqoop import "-Dorg.apache.sqoop.splitter.allow_text_splitter=true" \
--connect ${jdbcUrl} \
--username ${username} \
--password ${password} \
--query 'select *, "2023-12-07" as dt from scrm_department where 1=1 and $CONDITIONS' \
--hcatalog-database edu_ods \
--hcatalog-table ods_scrm_department \
--m 1;
dwd层维度表事实表拉链表建表语句
1,创建DWD层的表dim_customer_appealdrop table edu_dwd.dim_customer_appeal;
CREATE TABLE edu_dwd.`dim_customer_appeal` (`id` int,`customer_relationship_first_id` int COMMENT '第一条客户关系id',`employee_id` int COMMENT '申诉人',`employee_name` string COMMENT '申诉人姓名',`employee_department_id` int COMMENT '申诉人部门',`employee_tdepart_id` int COMMENT '申诉人所属部门',`appeal_status` int COMMENT '申诉状态,0:待稽核 1:无效 2:有效',`audit_id` int COMMENT '稽核人id',`audit_name` string COMMENT '稽核人姓名',`audit_department_id` int COMMENT '稽核人所在部门',`audit_department_name` string COMMENT '稽核人部门名称',`audit_date_time` string COMMENT '稽核时间',`create_date_time` string COMMENT '创建时间(申诉时间)',`update_date_time` string COMMENT '更新时间',`deleted` int COMMENT '删除标志位',`tenant` int ,
`end_date` string COMMENT '失效日期',start_date string COMMENT '开始日期') comment '线索申诉维度拉链表'partitioned by (dt string)
clustered by (customer_relationship_first_id) into 6 bucketsrow format delimited fields terminated by '\t'
stored as orc tblproperties ('orc.compress'='SNAPPY')relationship,clue,appeal,itcast_clazz,employee,department2,创建DWD层的表dim_itcast_clazz
drop table edu_dwd.dim_itcast_clazz;
CREATE TABLE edu_dwd.`dim_itcast_clazz` (`id` int COMMENT 'ems课程id(非自增)',`create_date_time` string COMMENT '创建时间',`update_date_time` string COMMENT '最后更新时间',`deleted` int COMMENT '是否被删除(禁用)',`itcast_school_id` string COMMENT 'ems校区ID',`itcast_school_name` string COMMENT 'ems校区名称',`itcast_subject_id` string COMMENT 'ems学科ID',`itcast_subject_name` string COMMENT 'ems学科名称',`itcast_brand` string COMMENT 'ems品牌',`clazz_type_state` string COMMENT '班级类型状态',`clazz_type_name` string COMMENT '班级类型名称',`teaching_mode` string COMMENT '授课模式',`start_time` string COMMENT '开班时间',`end_time` string COMMENT '毕业时间',`comment` string COMMENT '备注',`detail` string COMMENT '详情(比如:27期)',`uncertain` int COMMENT '待定班(0:否,1:是)',`tenant` int ,`end_date` string COMMENT '失效日期',start_date string COMMENT '开始日期',dt string )comment '报名课程表'
partitioned by (year string,month string,day string)
clustered by (id) into 3 bucketsrow format delimited fields terminated by '\t'
stored as orc tblproperties ('orc.compress'='SNAPPY');3,创建DWD层的表dim_scrm_departmentCREATE TABLE edu_dwd.`dim_scrm_department` (`id` bigint COMMENT '部门id',`name` string COMMENT '部门名称',`parent_id` bigint COMMENT '父部门id',`create_date_time` string COMMENT '创建时间',`update_date_time` string COMMENT '更新时间',`deleted` bigint COMMENT '删除标志',`id_path` string COMMENT '编码全路径',`tdepart_code` bigint COMMENT '直属部门',`creator` string COMMENT '创建者',`depart_level` bigint COMMENT '部门层级',`depart_sign` bigint COMMENT '部门标志,暂时默认1',`depart_line` bigint COMMENT '业务线,存储业务线编码',`depart_sort` bigint COMMENT '排序字段',`disable_flag` bigint COMMENT '禁用标志',`tenant` bigint,dt string
) comment '部门表'
partitioned by (yeat string,month string ,day string)
clustered by (id) into 3 buckets
row format delimited fields terminated by '\t' stored as orc tblproperties ('orc.compress'='SNAPPY');4,创建了DWD层的表edu_dwd.dim_employee
create table edu_dwd.dim_employee
(id bigint ,email string comment '公司邮箱,OA登录账号',real_name string comment '员工的真实姓名',phone string comment '手机号,目前还没有使用;隐私问题OA接口没有提供这个属性,',department_id string comment 'OA中的部门编号,有负值',department_name string comment 'OA中的部门名',remote_login bigint comment '员工是否可以远程登录',job_number string comment '员工工号',cross_school bigint comment '是否有跨校区权限',last_login_date string comment '最后登录日期',creator bigint comment '创建人',create_date_time string comment '创建时间',update_date_time string comment '最后更新时间',deleted bigint comment '是否被删除(禁用)',scrm_department_id bigint comment 'SCRM内部部门id',leave_office bigint comment '离职状态',leave_office_time string comment '离职时间',reinstated_time string comment '复职时间',superior_leaders_id bigint comment '上级领导ID',tdepart_id bigint comment '直属部门',tenant bigint,ems_user_name string
)comment '员工信息表'
partitioned by (dt string)
clustered by (id) into 3 buckets
row format delimited fields terminated by '\t' stored as orc tblproperties ('orc.compress'='SNAPPY');5,创建DWD层的表ods_customer_clue
drop table edu_dwd.ods_customer_clue;
CREATE TABLE edu_dwd.ods_customer_clue (`id` bigint ,`create_date_time` string COMMENT '创建时间',`update_date_time` string COMMENT '最后更新时间',`deleted` bigint COMMENT '是否被删除(禁用)',`customer_id` bigint COMMENT '客户id',`customer_relationship_id` bigint COMMENT '客户关系id',`session_id` string COMMENT '七陌会话id',`sid` string COMMENT '访客id',`status` string COMMENT '状态(undeal待领取 deal 已领取 finish 已关闭 changePeer 已流转)',`user` string COMMENT '所属坐席',`create_time` string COMMENT '七陌创建时间',`platform` string COMMENT '平台来源 (pc-网站咨询|wap-wap咨询|sdk-app咨询|weixin-微信咨询)',`s_name` string COMMENT '用户名称',`seo_source` string COMMENT '搜索来源',`seo_keywords` string COMMENT '关键字',`ip` string COMMENT 'IP地址',`referrer` string COMMENT '上级来源页面',`from_url` string COMMENT '会话来源页面',`landing_page_url` string COMMENT '访客着陆页面',`url_title` string COMMENT '咨询页面title',`to_peer` string COMMENT '所属技能组',`manual_time` string COMMENT '人工开始时间',`begin_time` string COMMENT '坐席领取时间 ',`reply_msg_count` bigint COMMENT '客服回复消息数',`total_msg_count` bigint COMMENT '消息总数',`msg_count` bigint COMMENT '客户发送消息数',`comment` string COMMENT '备注',`finish_reason` string COMMENT '结束类型',`finish_user` string COMMENT '结束坐席',`end_time` string COMMENT '会话结束时间',`platform_description` string COMMENT '客户平台信息',`browser_name` string COMMENT '浏览器名称',`os_info` string COMMENT '系统名称',`area` string COMMENT '区域',`country` string COMMENT '所在国家',`province` string COMMENT '省',`city` string COMMENT '城市',`creator` bigint COMMENT '创建人',`name` string COMMENT '客户姓名',`idcard` string COMMENT '身份证号',`phone` string COMMENT '手机号',`itcast_school_id` bigint COMMENT '校区Id',`itcast_school` string COMMENT '校区',`itcast_subject_id` bigint COMMENT '学科Id',`itcast_subject` string COMMENT '学科',`wechat` string COMMENT '微信',`qq` string COMMENT 'qq号',`email` string COMMENT '邮箱',`gender` string COMMENT '性别',`level` string COMMENT '客户级别',`origin_type` string COMMENT '数据来源渠道',`information_way` string COMMENT '资讯方式',`working_years` string COMMENT '开始工作时间',`technical_directions` string COMMENT '技术方向',`customer_state` string COMMENT '当前客户状态',`valid` bigint COMMENT '该线索是否是网资有效线索',`anticipat_signup_date` string COMMENT '预计报名时间',`clue_state` string COMMENT '线索状态',`scrm_department_id` bigint COMMENT 'SCRM内部部门id',`superior_url` string COMMENT '诸葛获取上级页面URL',`superior_source` string COMMENT '诸葛获取上级页面URL标题',`landing_url` string COMMENT '诸葛获取着陆页面URL',`landing_source` string COMMENT '诸葛获取着陆页面URL来源',`info_url` string COMMENT '诸葛获取留咨页URL',`info_source` string COMMENT '诸葛获取留咨页URL标题',`origin_channel` string COMMENT '投放渠道',`course_id` bigint,`course_name` string ,`zhuge_session_id` string,`is_repeat` bigint COMMENT '是否重复线索(手机号维度) 0:正常 1:重复',`tenant` bigint COMMENT '租户id',`activity_id` string COMMENT '活动id',`activity_name` string COMMENT '活动名称',`follow_type` bigint COMMENT '分配类型,0-自动分配,1-手动分配,2-自动转移,3-手动单个转移,4-手动批量转移,5-公海领取',`shunt_mode_id` bigint COMMENT '匹配到的技能组id',`shunt_employee_group_id` bigint COMMENT '所属分流员工组',`end_date` string COMMENT '失效日期',start_date string COMMENT '开始日期'dt string
) COMMENT '客户线索表'
partitioned by (year string,month string ,day string)
clustered by (customer_relationship_id) into 6 buckets
row format delimited fields terminated by '\t' stored as orc tblproperties ('orc.compress'='SNAPPY');6,创建DWD层的表fact_customer_relationship
drop table edu_ods.ods_customer_relationship;
CREATE TABLE edu_dwd.fact_customer_relationship(`id` bigint ,`create_date_time` string ,`update_date_time` string COMMENT '最后更新时间',`deleted` bigint COMMENT '是否被删除(禁用)',`customer_id` bigint COMMENT '所属客户id',`first_id` bigint COMMENT '第一条客户关系id',`belonger` bigint COMMENT '归属人',`belonger_name` string COMMENT '归属人姓名',`initial_belonger` bigint COMMENT '初始归属人',`distribution_handler` bigint COMMENT '分配处理人',`business_scrm_department_id` bigint COMMENT '归属部门',`last_visit_time` string COMMENT '最后回访时间',`next_visit_time` string COMMENT '下次回访时间',`origin_type` string COMMENT '数据来源',`itcast_school_id` bigint COMMENT '校区Id',`itcast_subject_id` bigint COMMENT '学科Id',`intention_study_type` string COMMENT '意向学习方式',`anticipat_signup_date` string COMMENT '预计报名时间',`level` string COMMENT '客户级别',`creator` bigint COMMENT '创建人',`current_creator` bigint COMMENT '当前创建人:初始==创建人,当在公海拉回时为 拉回人',`creator_name` string COMMENT '创建者姓名',`origin_channel` string COMMENT '来源渠道',`comment` string COMMENT '备注',`first_customer_clue_id` bigint COMMENT '第一条线索id',`last_customer_clue_id` bigint COMMENT '最后一条线索id',`process_state` string COMMENT '处理状态',`process_time` string COMMENT '处理状态变动时间',`payment_state` string COMMENT '支付状态',`payment_time` string COMMENT '支付状态变动时间',`signup_state` string COMMENT '报名状态',`signup_time` string COMMENT '报名时间',`notice_state` string COMMENT '通知状态',`notice_time` string COMMENT '通知状态变动时间',`lock_state` bigint COMMENT '锁定状态',`lock_time` string COMMENT '锁定状态修改时间',`itcast_clazz_id` bigint COMMENT '所属ems班级id',`itcast_clazz_time` string COMMENT '报班时间',`payment_url` string COMMENT '付款链接',`payment_url_time` string COMMENT '支付链接生成时间',`ems_student_id` bigint COMMENT 'ems的学生id',`delete_reason` string COMMENT '删除原因',`deleter` bigint COMMENT '删除人',`deleter_name` string COMMENT '删除人姓名',`delete_time` string COMMENT '删除时间',`course_id` bigint COMMENT '课程ID',`course_name` string COMMENT '课程名称',`delete_comment` string COMMENT '删除原因说明',`close_state` string COMMENT '关闭装填',`close_time` string COMMENT '关闭状态变动时间',`appeal_id` bigint COMMENT '申诉id',`tenant` bigint COMMENT '租户',`total_fee` decimal(19,0) COMMENT '报名费总金额',`belonged` bigint COMMENT '小周期归属人',`belonged_time` string COMMENT '归属时间',`belonger_time` string COMMENT '归属时间',`transfer` bigint COMMENT '转移人',`transfer_time` string COMMENT '转移时间',`follow_type` bigint COMMENT '分配类型,0-自动分配,1-手动分配,2-自动转移,3-手动单个转移,4-手动批量转移,5-公海领取',`transfer_bxg_oa_account` string COMMENT '转移到博学谷归属人OA账号',`transfer_bxg_belonger_name` string COMMENT '转移到博学谷归属人OA姓名'
) COMMENT '客户意向表'
partitioned by (year string,month string ,day string)
clustered by (id) into 3 buckets
row format delimited fields terminated by '\t' stored as orc tblproperties ('orc.compress'='SNAPPY');
数据从ods层导入dwd层语句
1,将ods层的数据插入dwd层的表dim_customer_appeal(拉链表)
insert overwrite table edu_dwd.dim_customer_appeal PARTITION (dt)
select `id` ,`customer_relationship_first_id` ,`employee_id` ,`employee_name` ,`employee_department_id`,`employee_tdepart_id`,`appeal_status`,`audit_id`,`audit_name`,`audit_department_id` ,`audit_department_name` ,`audit_date_time` ,`create_date_time`,`update_date_time`,`deleted`,`tenant` ,
'9999-99-99' end_date,
update_date_time as start_date,
dt
from edu_ods.ods_customer_appeal2,,将ods层的数据插入dwd层的表edu_dwd.`dim_itcast_clazz`(拉链表)
insert overwrite table edu_dwd.`dim_itcast_clazz` partition(year,month ,day)
select
id
,create_date_time
,update_date_time
,deleted
,itcast_school_id
,itcast_school_name
,itcast_subject_id
,itcast_subject_name
,itcast_brand
,clazz_type_state
,clazz_type_name
,teaching_mode
,start_time
,end_time
,comment
,detail
,uncertain
,tenant
,'9999-99-99' end_date
,update_date_time as start_date
,dt string
,year(create_date_time)
,month(create_date_time)
,day(create_date_time)
from
edu_ods.ods_itcast_clazz3,将ods层的数据插入dwd层的表dim_scrm_departmentinsert overwrite table edu_dwd.dim_scrm_department partition(year,month,day)
select
id
,name
,parent_id
,create_date_time
,update_date_time
,deleted
,id_path
,tdepart_code
,creator
,depart_level
,depart_sign
,depart_line
,depart_sort
,disable_flag
,tenant
,dt
,year(create_date_time) as year
,month(create_date_time) as month
,day(create_date_time) as day
from edu_ods.ods_scrm_department4,将ods层的数据插入dwd层的表edu_dwd.dim_employee
insert overwrite table edu_dwd.dim_employee partition(dt)
select
,id
,email
,real_name
,phone
,department_id
,department_name
,remote_login
,job_number
,cross_school
,last_login_date
,creator
,create_date_time
,update_date_time
,deleted
,scrm_department_id
,leave_office
,leave_office_time
,reinstated_time
,superior_leaders_id
,tdepart_id
,tenant
,ems_user_name
,dt
from deu_ods.ods_employee5,将ods层的数据插入dwd层的表clue(拉链表)
insert overwrite table edu_dwd.fact_customer_clue partition(year,month,day)
select
id
,create_date_time
,update_date_time
,deleted
,customer_id
,customer_relationship_id
,session_id
,sid
,status
,`user`
,create_time
,platform
,s_name
,seo_source
,seo_keywords
,ip
,referrer
,from_url
,landing_page_url
,url_title
,to_peer
,manual_time
,begin_time
,reply_msg_count
,total_msg_count
,msg_count
,comment
,finish_reason
,finish_user
,end_time
,platform_description
,browser_name
,os_info
,area
,country
,province
,city
,creator
,name
,idcard
,phone
,itcast_school_id
,itcast_school
,itcast_subject_id
,itcast_subject
,wechat
,qq
,email
,gender
,`level`
,origin_type
,information_way
,working_years
,technical_directions
,customer_state
,valid
,anticipat_signup_date
,clue_state
,scrm_department_id
,superior_url
,superior_source
,landing_url
,landing_source
,info_url
,info_source
,origin_channel
,course_id
,course_name
,zhuge_session_id
,is_repeat
,tenant
,activity_id
,activity_name
,follow_type
,shunt_mode_id
,shunt_employee_group_id
,'9999-99-99' end_date
,update_date_time as start_date,
dt,
year(create_date_time) as year,
month(create_date_time) as month,
day(create_date_time) as day
from edu_ods.ods_customer_clue6,将ods层的数据插入dwd层的表realtionship(拉链表)insert overwrite table edu_dwd.fact_customer_relationship partition(year,month,day)
select
id
,create_date_time
,update_date_time
,deleted
,customer_id
,first_id
,belonger
,belonger_name
,initial_belonger
,distribution_handler
,business_scrm_department_id
,last_visit_time
,next_visit_time
,origin_type
,itcast_school_id
,itcast_subject_id
,intention_study_type
,anticipat_signup_date
,level
,creator
,current_creator
,creator_name
,origin_channel
,comment
,first_customer_clue_id
,last_customer_clue_id
,process_state
,process_time
,payment_state
,payment_time
,signup_state
,signup_time
,notice_state
,notice_time
,lock_state
,lock_time
,itcast_clazz_id
,itcast_clazz_time
,payment_url
,payment_url_time
,ems_student_id
,delete_reason
,deleter
,deleter_name
,delete_time
,course_id
,course_name
,delete_comment
,close_state
,close_time
,appeal_id
,tenant
,total_fee
,belonged
,belonged_time
,belonger_time
,transfer
,transfer_time
,follow_type
,transfer_bxg_oa_account
,transfer_bxg_belonger_name
,'9999-99-99' end_date
,update_date_time as start_date,year(create_date_time) as year,month(create_date_time) as month,day(create_date_time) as dayfrom edu_ods.ods_customer_relationship
这篇关于分析项目需求后的ods层和dwd层建表和插入语句(全量脚本)的文章就介绍到这儿,希望我们推荐的文章对编程师们有所帮助!