当前位置:网站首页>数仓建表111111
数仓建表111111
2022-04-23 02:10:00 【Listener6666】
一、dim层
1、商品信息表
(1)建表语句
drop table if EXISTS dim_sku_info_df;
CREATE TABLE if not EXISTS dim_sku_info_df(
sku_id string comment '库存单元id',
price double comment '价格',
sku_name string comment '库存单元名称',
sku_desc string comment '商品规格描述',
weight double comment '重量',
is_sale string comment '是否销售(1:是 0:否)',
spu_id string comment '商品id',
spu_name string comment '商品名称',
brand_id string comment '品牌id',
brand_name string comment '品牌名称',
is_used string comment '是否有效 1有效 0失效' ,
banned_time string comment '封禁时间',
is_domestic string comment '是否国内品牌 1是 0否',
logo_url string comment '品牌logo的图片路径',
class_name_3 string comment '三级分类名称',
class_id_3 string comment '三级分类ID',
class_name_2 string comment '二级分类名称',
class_id_2 string comment '二级分类ID',
class_name_1 string comment '一级分类名称',
class_id_1 string comment '一级分类ID',
create_time string comment '创建时间',
update_time string comment '修改时间'
)
comment '商品信息表'
PARTITIONED BY (
pt string comment '日期'
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
STORED AS ORC tblproperties("orc.compress"="SNAPPY")
;
(2)导数语句
with sku AS
(SELECT
sku_id
,spu_id
,price
,sku_name
,sku_desc
,weight
,brand_id
,category3_id
,sku_img
,is_sale
,create_time
,update_time
,pt
from project.ods_sku_info_full where pt = '2022-01-01'),
spu as
(select
spu_id
,spu_name
from project.ods_spu_info_full where pt = '2022-01-01'),
brand as
(select
brand_id
,brand_name
,is_used
,banned_time
,is_domestic
,logo_url
from project.ods_pub_brand_full where pt = '2022-01-01'),
class as
(select
l3.class_id as class_id_3
,l3.class_name as class_name_3
,l2.class_id as class_id_2
,l2.class_name as class_name_2
,l1.class_id as class_id_1
,l1.class_name as class_name_1
from (select * from project.ods_pub_class_full where pt='2022-01-01' and `level`= '1') l1
left join (select * from project.ods_pub_class_full where pt='2022-01-01' and `level`='2') l2
on l1.class_id = l2.parent_id
left join (select * from project.ods_pub_class_full where pt='2022-01-01' and `level`='3') l3
on l2.class_id = l3.parent_id )
insert overwrite table project.dim_sku_info_df partition(pt='2022-01-01')
select
sku.sku_id,
sku.price,
sku.sku_name,
sku.sku_desc,
sku.weight,
sku.is_sale,
sku.spu_id,
spu.spu_name,
brand.brand_id,
brand.brand_name,
brand.is_used,
brand.banned_time,
brand.is_domestic,
brand.logo_url,
class.class_name_3,
class.class_id_3,
class.class_name_2,
class.class_id_2,
class.class_name_1,
class.class_id_1,
sku.create_time,
sku.update_time
from sku
left join spu
on sku.spu_id = spu.spu_id
left join brand
on sku.brand_id = brand.brand_id
left join class
on sku.category3_id = class.class_id_3 ;
2、商品详细属性表
(1)建表语句
drop table if EXISTS dim_sku_para_info_df;
CREATE TABLE if not EXISTS dim_sku_para_info_df(
id string comment 'id'
,sku_id string comment '库存单元id'
,spu_id string comment '商品id'
,para_id string comment 'sku参数id'
,attr_id string comment '属性id'
,attr_name string comment '属性名称'
,value string comment '属性值'
)
comment '商品详细属性表'
PARTITIONED BY (
pt string comment '日期'
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
STORED AS ORC tblproperties("orc.compress"="SNAPPY")
;
(2)导数语句
with sp AS
(select
id
,sku_id
,spu_id
,para_id
from project.ods_sku_para_full where pt = '2022-01-01'),
spi as
(select
para_id
,attr_id
,attr_name
,value
from project.ods_sku_para_info_full where pt = '2022-01-01')
insert overwrite table dim_sku_para_info_df partition(pt='2022-01-01')
select
sp.id
,sp.sku_id
,sp.spu_id
,sp.para_id
,spi.attr_id
,spi.attr_name
,spi.value
from sp
left join spi
on sp.para_id = spi.para_id
;
3、活动信息表
(1)建表语句
drop table if exists dim_activity_info_df;
create external table dim_activity_info_df
( id string comment 'id',
activity_id string comment '活动id',
activity_name string comment '活动名称',
activity_type_code string comment '活动类型编码',
activity_type_name string comment '活动类型名称',
activity_desc string comment '活动描述',
activity_rules string comment '活动规则',
content_amount double comment '满足的金额',
reduce_amount int comment '活动减免的金额',
content_cnt double comment '满足的件数',
reduce_rate double comment '活动减免的折扣',
activity_level string comment '优惠等级',
start_time string comment '开始时间',
end_time string comment '结束时间',
create_time string comment '创建时间',
update_time string comment '修改时间'
) comment '活动信息表'
partitioned by (pt string)
stored as orc
tblproperties ('orc.compress' = 'snappy')
;
(2)导数语句
with act as
(
select
id
,activity_id
,activity_name
,activity_type
,activity_desc
,case activity_type
when '3101' then concat('满',content_amount,'元减',reduce_amount,'元')
when '3102' then concat('满',content_cnt,'件打',10*(1-reduce_rate),'折')
end activity_rules
,content_amount
,reduce_amount
,content_cnt
,reduce_rate
,activity_level
,start_time
,end_time
,create_time
,update_time
from project.ods_activity_info_full where pt='2022-01-01'
),
code as
(
select
code_id
,code_name
from project.ods_pub_code_full where pt='2022-01-01'
)
insert overwrite table dim_activity_info_df partition(pt='2022-01-01')
select
id
,activity_id
,activity_name
,activity_type
,code.code_name
,activity_desc
,case activity_type
when '3101' then concat('满',content_amount,'元减',reduce_amount,'元')
when '3102' then concat('满',content_cnt,'件打',10*(1-reduce_rate),'折')
end activity_rules
,content_amount
,reduce_amount
,content_cnt
,reduce_rate
,activity_level
,start_time
,end_time
,create_time
,update_time
from act
left join code
on act.activity_type = code.code_id
;
4、优惠券信息表
(1)建表语句
drop table if exists dim_coupon_info_df;
create table dim_coupon_info_df
(
coupon_id string comment '购物券id'
,coupon_name string comment '购物券名称'
,coupon_type string comment '购物券类型'
,coupon_type_name string comment '购物券类型名称'
,content_amount double comment '满足的金额'
,reduce_amount double comment '优惠券减免的金额'
,range_type string comment '优惠券范围类型'
,class_id_3 string comment '三级分类id'
,start_time string comment '优惠券使用开始日期'
,end_time string comment '优惠券使用结束日期'
,create_time string comment '创建时间'
,update_time string comment '修改时间'
,etl_load_time string comment 'etl加载时间'
) comment '优惠券信息表'
partitioned by (pt string)
stored as orc
tblproperties ('orc.compress' = 'snappy')
;
(2)导数语句
with coupon as
(
select
coupon_id
,coupon_name
,coupon_type
,content_amount
,reduce_amount
,range_type
,category3_id
,start_time
,end_time
,create_time
,update_time
,current_timestamp()
from project.ods_coupon_info_full
where pt='2022-01-01'
)
,
code as
(
select
code_id
,code_name
from project.ods_pub_code_full where pt='2022-01-01'
)
insert overwrite table dim_coupon_info_df partition(pt='2022-01-01')
select
coupon_id
,coupon_name
,coupon_type
,code_name
,content_amount
,reduce_amount
,range_type
,category3_id
,start_time
,end_time
,create_time
,update_time
,current_timestamp()
from coupon
left join code
on coupon.coupon_type = code.code_id
;
5、区域表
(1)建表语句
drop table if exists dim_pub_area_df;
create table dim_pub_area_df
(
province_id string comment 'id'
,province_name string comment '省市名称'
,area_code string comment '地区编码'
,iso_code_old string comment '旧版地区编码'
,iso_code_new string comment '新版地区编码'
,region_id string comment '地区id'
,region_name string comment '地区名称'
,etl_load_time string comment 'etl加载时间'
) comment '区域表'
partitioned by (pt string)
stored as orc
tblproperties ('orc.compress' = 'snappy')
;
(2)导数语句
insert overwrite table dim_pub_area_df partition(pt='2022-01-01')
select
province_id
,province_name
,area_code
,iso_code
,iso_3166_2
,region_id
,region_name
,current_timestamp()
from project.ods_pub_area_full
where pt='2022-01-01'
6、商家信息表
(1)建表语句
drop table if exists dim_business_info_df;
create table dim_business_info_df
(
bus_id string comment '商家id'
,bus_name string comment '商家名称'
,telephone string comment '联系电话'
,pic_url string comment '图片地址URL'
,bus_user_name double comment '商家联系人'
,province double comment '省份'
,city string comment '城市'
,dist string comment '区'
,addr string comment '详细地址'
,zipcode string comment '邮编'
,is_open string comment '是否关店 1关店 0未关'
,open_time string comment '开店时间'
,closed_time string comment '关店时间'
,create_time string comment '创建时间'
,update_time string comment '修改时间'
,etl_load_time string comment 'etl加载时间'
) comment '优惠券信息表'
partitioned by (pt string)
stored as orc
tblproperties ('orc.compress' = 'snappy')
;
(2)导数语句
insert overwrite table dim_business_info_df partition(pt='2022-01-01')
select
bus_id
,bus_name
,md5(telephone)
,pic_url
,md5(bus_user_name)
,province
,city
,dist
,addr
,zipcode
,is_open
,open_time
,closed_time
,create_time
,update_time
,current_timestamp()
from project.ods_business_info_full
where pt ='2022-01-01'
;
7、用户信息表(拉链表)
(1)建表语句
DROP TABLE IF EXISTS dim_user_info_his;
CREATE EXTERNAL TABLE dim_user_info_his
(
user_id STRING COMMENT '用户id'
,login_name STRING COMMENT '用户名称'
,nick_name STRING COMMENT '用户昵称'
,name STRING COMMENT '用户姓名'
,phone_num STRING COMMENT '手机号码'
,email STRING COMMENT '邮箱'
,head_img STRING COMMENT '头像地址'
,user_level STRING COMMENT '用户等级'
,birthday STRING COMMENT '生日'
,gender STRING COMMENT '性别'
,status STRING COMMENT '状态 1-有效,0-无效,2-黑名单'
,create_time STRING COMMENT '创建时间'
,update_time STRING COMMENT '操作时间'
,start_date STRING COMMENT '开始日期'
,end_date STRING COMMENT '结束日期'
) COMMENT '用户表'
PARTITIONED BY (pt STRING)
STORED AS ORC
TBLPROPERTIES ('orc.compress' = 'snappy');
(2)首日装载语句
insert overwrite table dim_user_info_his partition(pt='9999-12-31')
select
user_id
,login_name
,nick_name
,md5(user_name)
,md5(phone_num)
,md5(email)
,head_img
,user_level
,birthday
,gender
,status
,create_time
,update_time
,'2022-01-01' start_date
,'9999-12-31' end_date
from project.ods_user_info_incr_stg
(3)每日装载(x)
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
with tmp as
(
select
t1.user_id as user_id1
, t1.login_name as login_name1
, t1.nick_name as nick_name1
, t1.name as name1
, t1.phone_num as phone_num1
, t1.email as email1
, t1.head_img as head_img1
, t1.user_level as user_level1
, t1.birthday as birthday1
, t1.gender as gender1
, t1.create_time as create_time1
, t1.update_time as update_time1
, t1.statuss as statuss1
, t1.start_date as start_date1
, t1.end_date as end_date1
, t2.user_id as user_id2
, t2.login_name as login_name2
, t2.nick_name as nick_name2
, t2.name as name2
, t2.phone_num as phone_num2
, t2.email as email2
, t2.head_img as head_img2
, t2.user_level as user_level2
, t2.birthday as birthday2
, t2.gender as gender2
, t2.create_time as create_time2
, t2.update_time as update_time2
, t2.statuss as statuss2
, t2.start_date as start_date2
, t2.end_date as end_date2
from
(
SELECT user_id, login_name, nick_name, name, phone_num, email, head_img, user_level, birthday, gender, create_time, update_time, `status` as statuss,start_date, end_date
FROM project.dim_user_info_his
where pt ='9999-12-31'
) t1
full join
(
SELECT user_id, login_name, nick_name, user_name as name , phone_num, email, head_img, user_level, birthday, gender, create_time, update_time, `status` as statuss,'2022-01-02' as start_date,'9999-12-31' as end_date
FROM project.ods_user_info_incr_stg
)t2
on t1.user_id = t2.user_id
)
insert overwrite table dim_user_info_his partition(pt)
select
if(user_id2 is not null,user_id2,user_id1)
, if(login_name2 is not null,login_name2,login_name1)
, if(nick_name2 is not null,nick_name2,nick_name1)
, if(name2 is not null,name2,name1)
, if(phone_num2 is not null,phone_num2,phone_num1)
, if(email2 is not null,email2,email1)
, if(head_img2 is not null,head_img2,head_img1)
, if(user_level2 is not null,user_level2,user_level1)
, if(birthday2 is not null,birthday2,birthday1)
, if(gender2 is not null,gender2,gender1)
, if(create_time2 is not null,create_time2,create_time1)
, if(update_time2 is not null,update_time2,update_time1)
, if(statuss2 is not null,statuss2,statuss1)
, if(start_date2 is not null,start_date2,start_date1)
, if(end_date2 is not null,end_date2,end_date1)
, if(end_date2 is not null,end_date2,end_date1) as pt
from tmp
union all
select
user_id1
, login_name1
, nick_name1
, name1
, phone_num1
, email1
, head_img1
, user_level1
, birthday1
, gender1
, create_time1
, update_time1
, statuss1
, start_date1
, '2022-01-01' as end_date1
, '2022-01-01' as pt
from tmp
where user_id1 is not null and user_id1 is not null
(4)每日装载
set hive.exec.dynamic.partition.mode=true;
with user_info_tmp as
(
select
dim.user_id
,dim.login_name
,dim.nick_name
,dim.name
,dim.phone_num
,dim.email
,dim.head_img
,dim.user_level
,dim.birthday
,dim.gender
,dim.status
,dim.create_time
,dim.update_time
,dim.start_date
,case when ods.user_id is not null and dim.end_date='9999-12-31' then date_sub('2022-01-02',1)
else dim.end_date end end_date
from (select
user_id
,login_name
,nick_name
,name
,phone_num
,email
,head_img
,user_level
,birthday
,gender
,status
,create_time
,update_time
,start_date
,end_date
from dim_user_info_his
where pt='9999-12-31') dim
left join ods_user_info_incr_stg ods
on dim.user_id = ods.user_id
union all
select
user_id
,login_name
,nick_name
,md5(user_name)
,md5(phone_num)
,md5(email)
,head_img
,user_level
,birthday
,gender
,status
,create_time
,update_time
,'2022-01-02' start_date
,'9999-12-31' end_date
from ods_user_info_incr_stg
)
insert overwrite table dim_user_info_his partition(pt)
select
user_id
,login_name
,nick_name
,name
,phone_num
,email
,head_img
,user_level
,birthday
,gender
,status
,create_time
,update_time
,start_date
,end_date
,end_date
from user_info_tmp
;
版权声明
本文为[Listener6666]所创,转载请带上原文链接,感谢
https://blog.csdn.net/qq_43476430/article/details/124348276
边栏推荐
- Nanny level tutorial on building personal home page (II)
- Leetcode39 combined sum
- How to configure iptables to realize local port forwarding
- tp6阿里云短信 window 报 cURL error 60: SSL certificate problem: unable to get local issuer certificate
- App optimization and advanced scoreboard Part 2 [Mui + flask + mongodb]
- 89 logistic回歸用戶畫像用戶響應度預測
- R language advanced | generalized vector and attribute analysis
- 小程序 canvas 画布半圆环
- 009_Redis_RedisTemplate入门
- 用TensorFlow实现线性回归(包括过程中出现的问题及解决方法)
猜你喜欢
What is a dial-up server and what is its use?
Gray scale range corresponding to colors (red, yellow, green, blue, purple, pink, brick red and magenta) in HSV color space
005_redis_set集合
89 logistic回歸用戶畫像用戶響應度預測
Leetcode46 Full Permutation
Want to experience homekit smart home? Why don't you take a look at this smart ecosystem
简洁开源的一款导航网站源码
009_ Redis_ Getting started with redistemplate
什么是bgp服务器,有哪些优势?
012_ Access denied for user ‘root‘@‘localhost‘ (using password: YES)
随机推荐
一加一为什么等于二
007_Redis_Jedis连接池
Common formatting problems after word writing
Numerical remapping method (remap)
How to set computer IP?
Applet canvas canvas half ring
[hands on learning] network depth v2.1 Sequence model
Talk about biology live broadcast: Dr. Wang Ziyuan, a lake view biology, exploring hepatitis B with gene therapy
PTA: Romantic reflection [binary tree reconstruction] [depth first traversal]
Tp6 Alibaba cloud SMS window reports curl error 60: SSL certificate problem: unable to get local issuer certificate
Is CICC fortune a state-owned enterprise and is it safe to open an account
What are the test steps of dynamic proxy IP?
011_RedisTemplate操作Hash
R language advanced | generalized vector and attribute analysis
On LAN
They are all intelligent in the whole house. What's the difference between aqara and homekit?
Dynamic batch processing and static batch processing of unity
89 logistic regression user portrait user response prediction
Want to experience homekit smart home? Why don't you take a look at this smart ecosystem
What business scenarios will the BGP server be used in?