当前位置:网站首页>Data warehouse construction table 111111
Data warehouse construction table 111111
2022-04-23 02:15:00 【Listener6666】
One 、dim layer
1、 Commodity information table
(1) Create table statement
drop table if EXISTS dim_sku_info_df;
CREATE TABLE if not EXISTS dim_sku_info_df(
sku_id string comment ' Inventory unit id',
price double comment ' Price ',
sku_name string comment ' Inventory unit name ',
sku_desc string comment ' Product specification description ',
weight double comment ' weight ',
is_sale string comment ' Whether to sell (1: yes 0: no )',
spu_id string comment ' goods id',
spu_name string comment ' Name of commodity ',
brand_id string comment ' brand id',
brand_name string comment ' The brand name ',
is_used string comment ' Whether it works 1 It works 0 invalid ' ,
banned_time string comment ' Closure time ',
is_domestic string comment ' Is it a domestic brand 1 yes 0 no ',
logo_url string comment ' brand logo Picture path for ',
class_name_3 string comment ' The name of the third level classification ',
class_id_3 string comment ' Three levels of classification ID',
class_name_2 string comment ' Secondary classification name ',
class_id_2 string comment ' Secondary classification ID',
class_name_1 string comment ' First class classification name ',
class_id_1 string comment ' First level classification ID',
create_time string comment ' Creation time ',
update_time string comment ' Modification time '
)
comment ' Commodity information table '
PARTITIONED BY (
pt string comment ' date '
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
STORED AS ORC tblproperties("orc.compress"="SNAPPY")
;
(2) Derivative statement
with sku AS
(SELECT
sku_id
,spu_id
,price
,sku_name
,sku_desc
,weight
,brand_id
,category3_id
,sku_img
,is_sale
,create_time
,update_time
,pt
from project.ods_sku_info_full where pt = '2022-01-01'),
spu as
(select
spu_id
,spu_name
from project.ods_spu_info_full where pt = '2022-01-01'),
brand as
(select
brand_id
,brand_name
,is_used
,banned_time
,is_domestic
,logo_url
from project.ods_pub_brand_full where pt = '2022-01-01'),
class as
(select
l3.class_id as class_id_3
,l3.class_name as class_name_3
,l2.class_id as class_id_2
,l2.class_name as class_name_2
,l1.class_id as class_id_1
,l1.class_name as class_name_1
from (select * from project.ods_pub_class_full where pt='2022-01-01' and `level`= '1') l1
left join (select * from project.ods_pub_class_full where pt='2022-01-01' and `level`='2') l2
on l1.class_id = l2.parent_id
left join (select * from project.ods_pub_class_full where pt='2022-01-01' and `level`='3') l3
on l2.class_id = l3.parent_id )
insert overwrite table project.dim_sku_info_df partition(pt='2022-01-01')
select
sku.sku_id,
sku.price,
sku.sku_name,
sku.sku_desc,
sku.weight,
sku.is_sale,
sku.spu_id,
spu.spu_name,
brand.brand_id,
brand.brand_name,
brand.is_used,
brand.banned_time,
brand.is_domestic,
brand.logo_url,
class.class_name_3,
class.class_id_3,
class.class_name_2,
class.class_id_2,
class.class_name_1,
class.class_id_1,
sku.create_time,
sku.update_time
from sku
left join spu
on sku.spu_id = spu.spu_id
left join brand
on sku.brand_id = brand.brand_id
left join class
on sku.category3_id = class.class_id_3 ;
2、 Detailed attribute table of goods
(1) Create table statement
drop table if EXISTS dim_sku_para_info_df;
CREATE TABLE if not EXISTS dim_sku_para_info_df(
id string comment 'id'
,sku_id string comment ' Inventory unit id'
,spu_id string comment ' goods id'
,para_id string comment 'sku Parameters id'
,attr_id string comment ' attribute id'
,attr_name string comment ' The attribute name '
,value string comment ' Property value '
)
comment ' Detailed attribute table of goods '
PARTITIONED BY (
pt string comment ' date '
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
STORED AS ORC tblproperties("orc.compress"="SNAPPY")
;
(2) Derivative statement
with sp AS
(select
id
,sku_id
,spu_id
,para_id
from project.ods_sku_para_full where pt = '2022-01-01'),
spi as
(select
para_id
,attr_id
,attr_name
,value
from project.ods_sku_para_info_full where pt = '2022-01-01')
insert overwrite table dim_sku_para_info_df partition(pt='2022-01-01')
select
sp.id
,sp.sku_id
,sp.spu_id
,sp.para_id
,spi.attr_id
,spi.attr_name
,spi.value
from sp
left join spi
on sp.para_id = spi.para_id
;
3、 Activity information table
(1) Create table statement
drop table if exists dim_activity_info_df;
create external table dim_activity_info_df
( id string comment 'id',
activity_id string comment ' Activities id',
activity_name string comment ' The name of the event ',
activity_type_code string comment ' Activity type code ',
activity_type_name string comment ' Activity type name ',
activity_desc string comment ' Activity description ',
activity_rules string comment ' Activity rules ',
content_amount double comment ' Amount satisfied ',
reduce_amount int comment ' The amount of activity relief ',
content_cnt double comment ' Number of pieces satisfied ',
reduce_rate double comment ' Discount for event reduction ',
activity_level string comment ' Preferential grade ',
start_time string comment ' Starting time ',
end_time string comment ' End time ',
create_time string comment ' Creation time ',
update_time string comment ' Modification time '
) comment ' Activity information table '
partitioned by (pt string)
stored as orc
tblproperties ('orc.compress' = 'snappy')
;
(2) Derivative statement
with act as
(
select
id
,activity_id
,activity_name
,activity_type
,activity_desc
,case activity_type
when '3101' then concat(' full ',content_amount,' Yuan reduction ',reduce_amount,' element ')
when '3102' then concat(' full ',content_cnt,' A dozen ',10*(1-reduce_rate),' fold ')
end activity_rules
,content_amount
,reduce_amount
,content_cnt
,reduce_rate
,activity_level
,start_time
,end_time
,create_time
,update_time
from project.ods_activity_info_full where pt='2022-01-01'
),
code as
(
select
code_id
,code_name
from project.ods_pub_code_full where pt='2022-01-01'
)
insert overwrite table dim_activity_info_df partition(pt='2022-01-01')
select
id
,activity_id
,activity_name
,activity_type
,code.code_name
,activity_desc
,case activity_type
when '3101' then concat(' full ',content_amount,' Yuan reduction ',reduce_amount,' element ')
when '3102' then concat(' full ',content_cnt,' A dozen ',10*(1-reduce_rate),' fold ')
end activity_rules
,content_amount
,reduce_amount
,content_cnt
,reduce_rate
,activity_level
,start_time
,end_time
,create_time
,update_time
from act
left join code
on act.activity_type = code.code_id
;
4、 Coupon information form
(1) Create table statement
drop table if exists dim_coupon_info_df;
create table dim_coupon_info_df
(
coupon_id string comment ' Shopping voucher id'
,coupon_name string comment ' Name of the shopping voucher '
,coupon_type string comment ' Type of shopping voucher '
,coupon_type_name string comment ' Type name of shopping voucher '
,content_amount double comment ' Amount satisfied '
,reduce_amount double comment ' The amount of coupon reduction '
,range_type string comment ' Coupon type range '
,class_id_3 string comment ' Three levels of classification id'
,start_time string comment ' Coupon use start date '
,end_time string comment ' Coupon usage end date '
,create_time string comment ' Creation time '
,update_time string comment ' Modification time '
,etl_load_time string comment 'etl Loading time '
) comment ' Coupon information form '
partitioned by (pt string)
stored as orc
tblproperties ('orc.compress' = 'snappy')
;
(2) Derivative statement
with coupon as
(
select
coupon_id
,coupon_name
,coupon_type
,content_amount
,reduce_amount
,range_type
,category3_id
,start_time
,end_time
,create_time
,update_time
,current_timestamp()
from project.ods_coupon_info_full
where pt='2022-01-01'
)
,
code as
(
select
code_id
,code_name
from project.ods_pub_code_full where pt='2022-01-01'
)
insert overwrite table dim_coupon_info_df partition(pt='2022-01-01')
select
coupon_id
,coupon_name
,coupon_type
,code_name
,content_amount
,reduce_amount
,range_type
,category3_id
,start_time
,end_time
,create_time
,update_time
,current_timestamp()
from coupon
left join code
on coupon.coupon_type = code.code_id
;
5、 Area table
(1) Create table statement
drop table if exists dim_pub_area_df;
create table dim_pub_area_df
(
province_id string comment 'id'
,province_name string comment ' Provincial and municipal name '
,area_code string comment ' Area code '
,iso_code_old string comment ' Old region code '
,iso_code_new string comment ' New regional code '
,region_id string comment ' region id'
,region_name string comment ' Area name '
,etl_load_time string comment 'etl Loading time '
) comment ' Area table '
partitioned by (pt string)
stored as orc
tblproperties ('orc.compress' = 'snappy')
;
(2) Derivative statement
insert overwrite table dim_pub_area_df partition(pt='2022-01-01')
select
province_id
,province_name
,area_code
,iso_code
,iso_3166_2
,region_id
,region_name
,current_timestamp()
from project.ods_pub_area_full
where pt='2022-01-01'
6、 Business information form
(1) Create table statement
drop table if exists dim_business_info_df;
create table dim_business_info_df
(
bus_id string comment ' merchants id'
,bus_name string comment ' Business name '
,telephone string comment ' contact number '
,pic_url string comment ' Picture address URL'
,bus_user_name double comment ' Merchant contact '
,province double comment ' Province '
,city string comment ' City '
,dist string comment ' District '
,addr string comment ' Detailed address '
,zipcode string comment ' Zip code '
,is_open string comment ' Whether to close the store 1 Put up the shutters 0 Not closed '
,open_time string comment ' Opening time '
,closed_time string comment ' Closing time '
,create_time string comment ' Creation time '
,update_time string comment ' Modification time '
,etl_load_time string comment 'etl Loading time '
) comment ' Coupon information form '
partitioned by (pt string)
stored as orc
tblproperties ('orc.compress' = 'snappy')
;
(2) Derivative statement
insert overwrite table dim_business_info_df partition(pt='2022-01-01')
select
bus_id
,bus_name
,md5(telephone)
,pic_url
,md5(bus_user_name)
,province
,city
,dist
,addr
,zipcode
,is_open
,open_time
,closed_time
,create_time
,update_time
,current_timestamp()
from project.ods_business_info_full
where pt ='2022-01-01'
;
7、 User information sheet ( Zipper table )
(1) Create table statement
DROP TABLE IF EXISTS dim_user_info_his;
CREATE EXTERNAL TABLE dim_user_info_his
(
user_id STRING COMMENT ' user id'
,login_name STRING COMMENT ' User name '
,nick_name STRING COMMENT ' The user nickname '
,name STRING COMMENT ' User name '
,phone_num STRING COMMENT ' Phone number '
,email STRING COMMENT ' mailbox '
,head_img STRING COMMENT ' Head portrait address '
,user_level STRING COMMENT ' User level '
,birthday STRING COMMENT ' Birthday '
,gender STRING COMMENT ' Gender '
,status STRING COMMENT ' state 1- It works ,0- Invalid ,2- The blacklist '
,create_time STRING COMMENT ' Creation time '
,update_time STRING COMMENT ' Operating time '
,start_date STRING COMMENT ' Start date '
,end_date STRING COMMENT ' End date '
) COMMENT ' User table '
PARTITIONED BY (pt STRING)
STORED AS ORC
TBLPROPERTIES ('orc.compress' = 'snappy');
(2) First day load statement
insert overwrite table dim_user_info_his partition(pt='9999-12-31')
select
user_id
,login_name
,nick_name
,md5(user_name)
,md5(phone_num)
,md5(email)
,head_img
,user_level
,birthday
,gender
,status
,create_time
,update_time
,'2022-01-01' start_date
,'9999-12-31' end_date
from project.ods_user_info_incr_stg
(3) Daily load (x)
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
with tmp as
(
select
t1.user_id as user_id1
, t1.login_name as login_name1
, t1.nick_name as nick_name1
, t1.name as name1
, t1.phone_num as phone_num1
, t1.email as email1
, t1.head_img as head_img1
, t1.user_level as user_level1
, t1.birthday as birthday1
, t1.gender as gender1
, t1.create_time as create_time1
, t1.update_time as update_time1
, t1.statuss as statuss1
, t1.start_date as start_date1
, t1.end_date as end_date1
, t2.user_id as user_id2
, t2.login_name as login_name2
, t2.nick_name as nick_name2
, t2.name as name2
, t2.phone_num as phone_num2
, t2.email as email2
, t2.head_img as head_img2
, t2.user_level as user_level2
, t2.birthday as birthday2
, t2.gender as gender2
, t2.create_time as create_time2
, t2.update_time as update_time2
, t2.statuss as statuss2
, t2.start_date as start_date2
, t2.end_date as end_date2
from
(
SELECT user_id, login_name, nick_name, name, phone_num, email, head_img, user_level, birthday, gender, create_time, update_time, `status` as statuss,start_date, end_date
FROM project.dim_user_info_his
where pt ='9999-12-31'
) t1
full join
(
SELECT user_id, login_name, nick_name, user_name as name , phone_num, email, head_img, user_level, birthday, gender, create_time, update_time, `status` as statuss,'2022-01-02' as start_date,'9999-12-31' as end_date
FROM project.ods_user_info_incr_stg
)t2
on t1.user_id = t2.user_id
)
insert overwrite table dim_user_info_his partition(pt)
select
if(user_id2 is not null,user_id2,user_id1)
, if(login_name2 is not null,login_name2,login_name1)
, if(nick_name2 is not null,nick_name2,nick_name1)
, if(name2 is not null,name2,name1)
, if(phone_num2 is not null,phone_num2,phone_num1)
, if(email2 is not null,email2,email1)
, if(head_img2 is not null,head_img2,head_img1)
, if(user_level2 is not null,user_level2,user_level1)
, if(birthday2 is not null,birthday2,birthday1)
, if(gender2 is not null,gender2,gender1)
, if(create_time2 is not null,create_time2,create_time1)
, if(update_time2 is not null,update_time2,update_time1)
, if(statuss2 is not null,statuss2,statuss1)
, if(start_date2 is not null,start_date2,start_date1)
, if(end_date2 is not null,end_date2,end_date1)
, if(end_date2 is not null,end_date2,end_date1) as pt
from tmp
union all
select
user_id1
, login_name1
, nick_name1
, name1
, phone_num1
, email1
, head_img1
, user_level1
, birthday1
, gender1
, create_time1
, update_time1
, statuss1
, start_date1
, '2022-01-01' as end_date1
, '2022-01-01' as pt
from tmp
where user_id1 is not null and user_id1 is not null
(4) Daily load
set hive.exec.dynamic.partition.mode=true;
with user_info_tmp as
(
select
dim.user_id
,dim.login_name
,dim.nick_name
,dim.name
,dim.phone_num
,dim.email
,dim.head_img
,dim.user_level
,dim.birthday
,dim.gender
,dim.status
,dim.create_time
,dim.update_time
,dim.start_date
,case when ods.user_id is not null and dim.end_date='9999-12-31' then date_sub('2022-01-02',1)
else dim.end_date end end_date
from (select
user_id
,login_name
,nick_name
,name
,phone_num
,email
,head_img
,user_level
,birthday
,gender
,status
,create_time
,update_time
,start_date
,end_date
from dim_user_info_his
where pt='9999-12-31') dim
left join ods_user_info_incr_stg ods
on dim.user_id = ods.user_id
union all
select
user_id
,login_name
,nick_name
,md5(user_name)
,md5(phone_num)
,md5(email)
,head_img
,user_level
,birthday
,gender
,status
,create_time
,update_time
,'2022-01-02' start_date
,'9999-12-31' end_date
from ods_user_info_incr_stg
)
insert overwrite table dim_user_info_his partition(pt)
select
user_id
,login_name
,nick_name
,name
,phone_num
,email
,head_img
,user_level
,birthday
,gender
,status
,create_time
,update_time
,start_date
,end_date
,end_date
from user_info_tmp
;
版权声明
本文为[Listener6666]所创,转载请带上原文链接,感谢
https://yzsam.com/2022/04/202204230210423535.html
边栏推荐
- Log4j2 configuration
- Realize linear regression with tensorflow (including problems and solutions in the process)
- 【Chrome扩展程序】content_script的跨域问题
- 手写内存池以及原理代码分析【C语言】
- 数仓建表111111
- 不断下沉的咖啡业,是虚假的繁荣还是破局的前夜?
- 89 logistic回歸用戶畫像用戶響應度預測
- Nanny level tutorial on building personal home page (II)
- C # import details
- JSP page nesting
猜你喜欢
随机推荐
Dynamic memory management
What is BGP server and what are its advantages?
011_ Redistemplate operation hash
Synchronized锁及其膨胀
都是做全屋智能的,Aqara和HomeKit到底有什么不同?
数仓建表111111
Explain JS prototype and prototype chain in detail
Unity editor hierarchy drop-down menu extension
【汇编语言】从最底层的角度理解“堆栈”
[NK] Niuke monthly race 48 D
tp6阿裏雲短信 window 報 cURL error 60: SSL certificate problem: unable to get local issuer certificate
Log4j2 configuration
How to call out services in idea and display the startup class in services
leetcode:27. Remove element [count remove]
IAR嵌入式開發STM32f103c8t6之點亮LED燈
What businesses use physical servers?
89 logistic回归用户画像用户响应度预测
Common formatting problems after word writing
从0开始开发一个chrome插件(2)
Hyperscan -- 2 compilation









