ClickHouse简单语句

本地表

CREATE TABLE enic_test_local
(
    id    Int16,
    name  String,
    birth Date
) ENGINE = MergeTree()
      PARTITION BY toYYYYMM(birth)
      ORDER BY id;
insert into enic_test_local
values (1, 'test1', '2022-02-01'),
       (2, 'test2', '2022-03-01'),
       (3, 'test3', '2022-05-01'),
       (4, 'test3', '2022-06-01'),
       (5, 'test3', '2022-07-01'),
       (6, 'test3', '2022-08-01'),
       (7, 'test3', '2022-09-01'),
       (8, 'test3', '2022-10-01'),
       (9, 'test3', '2022-11-01');

select * from enic_test_local;

分布表

-- 分布表需先在集群创建本地表
CREATE TABLE default.tb_test_local ON CLUSTER wxpt_cluster
(
    id    UInt64,
    name  String,
    birth Date
) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/default/tb_test_local', '{replica}')
      PARTITION BY toYYYYMM(birth)
      ORDER BY id;
-- 其中{shard}与{replica}是占位字段,服务会默认去取配置文件中的shard与replica字段,并在zk上创建对应路径
-- ON CLUSTER关键词依赖于zk服务,含义是在集群的所有节点创建该表

-- 创建分布表
-- id 为分布键
CREATE TABLE IF NOT EXISTS default.tb_test_all ON CLUSTER wxpt_cluster ENGINE = Distributed(wxpt_cluster, default, tb_test_local ,id);

-- 向分布表中插入数据
insert into default.tb_test_all
values (1, 'test1', '2022-02-01'),
       (2, 'test2', '2022-03-01'),
       (3, 'test3', '2022-05-01'),
       (4, 'test3', '2022-06-01'),
       (5, 'test3', '2022-07-01'),
       (6, 'test3', '2022-08-01'),
       (7, 'test3', '2022-09-01'),
       (8, 'test3', '2022-10-01'),
       (9, 'test3', '2022-11-01');
-- ck会根据sharing字段自动分配数据到指定的分片

其他一些语句

-- 变更列
ALTER TABLE [db].name [ON CLUSTER cluster] ADD|DROP|RENAME|CLEAR|COMMENT|{MODIFY|ALTER}|MATERIALIZE COLUMN ...

-- 删除分区,集群分布表不会立即删除,而是在某个时机删除
ALTER TABLE raw_layer.tb_st_user_label_new_local ON CLUSTER wxpt_cluster DROP PARTITION '20220825';
CREATE DATABASE report ON CLUSTER wxpt_cluster;

CREATE TABLE report.tb_home_page_touch_collect_local ON CLUSTER wxpt_cluster
(
    ACTIVITY_ID      String COMMENT '活动ID',
    RECOMMEND_TYPE   String COMMENT '推荐类型',
    ACTIVITY_NAME    String COMMENT '活动名称',
    ACTIVITY_START   DateTime COMMENT '活动开始时间',
    ACTIVITY_END     DateTime COMMENT '活动结束时间',
    CREATOR          String COMMENT '创建人',
    ORG_NAME         String COMMENT '组织名称',
    GOODS_ID         String COMMENT '商品ID',
    GOODS_TABLE_ID   UInt64 COMMENT '商品表ID',
    SECTION_TYPE     String COMMENT '业务类型',
    FIRST_DATA_TYPE  String COMMENT '一级分类',
    SECOND_DATA_TYPE String COMMENT '二级分类',
    POSITION_CODE    String COMMENT '位置编码',
    POSITION_NAME    String COMMENT '位置名称',
    PHONE_NUMBER     UInt64 COMMENT '手机号码',
    RECOM_PV         UInt64 COMMENT '推荐数',
    CLICK_PV         UInt64 COMMENT '点击数',
    ORDER_PV         UInt64 COMMENT '订单数',
    GOODS_URL        String COMMENT '商品URL',
    GOODS_NAME       String COMMENT '商品名称',
    GOODS_CHANNEL    String COMMENT '商品触点编码',
    ACT_ORDER_PV     UInt64 COMMENT '活动级订单数',
    OPERA_DAY        String COMMENT '统计日期'
) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/report/tb_home_page_touch_collect_local',
           '{replica}')
      PARTITION BY OPERA_DAY
      ORDER BY (PHONE_NUMBER, ACTIVITY_ID, GOODS_TABLE_ID, POSITION_CODE)
      PRIMARY KEY (PHONE_NUMBER, ACTIVITY_ID, GOODS_TABLE_ID, POSITION_CODE);


CREATE TABLE report.tb_home_page_touch_collect
    ON CLUSTER wxpt_cluster AS report.tb_home_page_touch_collect_local ENGINE = Distributed(wxpt_cluster, report, tb_home_page_touch_collect_local ,intHash64(PHONE_NUMBER));

CREATE TABLE report.tb_home_page_touch_order_act_collect_local ON CLUSTER wxpt_cluster
(
    POSITION_NAME    String,
    ORG_NAME         String,
    SECTION_TYPE     String,
    FIRST_DATA_TYPE  String,
    SECOND_DATA_TYPE String,
    OPERA_FLAG       String,
    ORDER_PV         UInt64,
    RECOMMEND_TYPE   String,
    OPERA_DAY        String,
    OPERA_TYPE       String
) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/report/tb_home_page_touch_order_act_collect_local',
           '{replica}')
      PARTITION BY OPERA_DAY
      ORDER BY (POSITION_NAME, ORG_NAME, SECTION_TYPE, FIRST_DATA_TYPE, SECOND_DATA_TYPE, OPERA_FLAG, RECOMMEND_TYPE,
                OPERA_DAY, OPERA_TYPE)
      PRIMARY KEY (POSITION_NAME, ORG_NAME, SECTION_TYPE, FIRST_DATA_TYPE, SECOND_DATA_TYPE, OPERA_FLAG, RECOMMEND_TYPE,
                   OPERA_DAY, OPERA_TYPE);

CREATE TABLE IF NOT EXISTS report.tb_home_page_touch_order_act_collect
    ON CLUSTER wxpt_cluster AS report.tb_home_page_touch_order_act_collect_local ENGINE = Distributed(wxpt_cluster, report, tb_home_page_touch_order_act_collect_local ,cityHash64(POSITION_NAME, ORG_NAME, SECTION_TYPE, FIRST_DATA_TYPE, SECOND_DATA_TYPE, OPERA_FLAG, RECOMMEND_TYPE, OPERA_DAY, OPERA_TYPE));

WITH T_AVG AS (SELECT POSITION_NAME,
                      avg(R_P)                             A_R_P,
                      avg(R_U)                             A_R_U,
                      avg(C_P)                             A_C_P,
                      avg(if(C_U + 1 = R_U, C_U + 1, C_U)) A_C_U
               FROM (SELECT POSITION_NAME,
                            OPERA_DAY,
                            SUM(RECOM_PV)                                      R_P,
                            groupBitmap(PHONE_NUMBER)                          R_U,
                            SUM(CLICK_PV)                                      C_P,
                            groupBitmap(IF(CLICK_PV > 0, PHONE_NUMBER, 0)) - 1 C_U
                     FROM report.tb_home_page_touch_collect
                     WHERE OPERA_DAY = '20220701'
                     GROUP BY OPERA_DAY, POSITION_NAME) TEMP
               GROUP BY POSITION_NAME),
     T_DEL AS (SELECT POSITION_NAME,
                      R_P                             D_R_P,
                      R_U                             D_R_U,
                      C_P                             D_C_P,
                      if(C_U + 1 = R_U, C_U + 1, C_U) D_C_U
               FROM (SELECT POSITION_NAME,
                            SUM(RECOM_PV)                                      R_P,
                            groupBitmap(PHONE_NUMBER)                          R_U,
                            SUM(CLICK_PV)                                      C_P,
                            groupBitmap(IF(CLICK_PV > 0, PHONE_NUMBER, 0)) - 1 C_U
                     FROM report.tb_home_page_touch_collect
                     WHERE OPERA_DAY = '20220701'
                     GROUP BY POSITION_NAME) TEMP),
     T_ODR AS (SELECT POSITION_NAME, SUM(ORDER_PV_2) ORDER_PV_3, AVG(ORDER_PV_2) A_O_P
               FROM (SELECT OPERA_DAY, POSITION_NAME, SUM(ORDER_PV_1) ORDER_PV_2
                     FROM (SELECT OPERA_DAY,
                                  POSITION_NAME,
                                  OPERA_FLAG,
                                  MAX(ORDER_PV) ORDER_PV_1
                           FROM report.tb_home_page_touch_order_act_collect
                           WHERE OPERA_DAY = '20220701'
                             AND OPERA_TYPE = 'ODR'
                           GROUP BY OPERA_DAY, POSITION_NAME, OPERA_FLAG) TEMP
                     GROUP BY OPERA_DAY, POSITION_NAME) TEMP
               GROUP BY POSITION_NAME),
     T_ACT AS (SELECT POSITION_NAME, SUM(ACT_NUMS) ACT_PV, AVG(ACT_NUMS) A_A_P
               FROM (SELECT OPERA_DAY, POSITION_NAME, COUNT(1) ACT_NUMS
                     FROM (SELECT OPERA_DAY,
                                  POSITION_NAME,
                                  OPERA_FLAG
                           FROM report.tb_home_page_touch_order_act_collect
                           WHERE OPERA_DAY = '20220701'
                             AND OPERA_TYPE = 'ACT'
                           GROUP BY OPERA_DAY, POSITION_NAME, OPERA_FLAG) TEMP
                     GROUP BY OPERA_DAY, POSITION_NAME) TEMP
               GROUP BY POSITION_NAME)
SELECT T_AVG.POSITION_NAME                                                          positionName,
       ifnull(T_ACT.A_A_P, 0)                                                       avgActNums,
       T_AVG.A_R_U                                                                  avgRecomUvNums,
       T_AVG.A_R_P                                                                  avgRecomPvNums,
       ifnull(T_AVG.A_C_U, 0)                                                       avgClickUvNums,
       ifnull(T_AVG.A_C_P, 0)                                                       avgClickPvNums,
       ifnull(T_AVG.A_C_U, 0) / T_AVG.A_R_U                                         acgUvClickPercent,
       ifnull(T_AVG.A_C_P, 0) / T_AVG.A_R_P                                         acgPvClickPercent,
       ifnull(T_ODR.A_O_P, 0)                                                       avgOrderNums,
       if(ifnull(T_AVG.A_C_P, 0) = 0, 0, ifnull(T_ODR.A_O_P, 0) / T_AVG.A_C_P)      avgOrderPercent,
       ifnull(T_ACT.ACT_PV, 0)                                                      actNums,
       ifnull(T_DEL.D_R_U, 0)                                                       recomUvNums,
       ifnull(T_DEL.D_R_P, 0)                                                       recomPvNums,
       ifnull(T_DEL.D_C_U, 0)                                                       clickUvNums,
       ifnull(T_DEL.D_C_P, 0)                                                       clickPvNums,
       ifnull(T_DEL.D_C_U, 0) / T_DEL.D_R_U                                         clickUvPercent,
       ifnull(T_ODR.ORDER_PV_3, 0)                                                  orderNums,
       if(ifnull(T_DEL.D_C_P, 0) = 0, 0, ifnull(T_ODR.ORDER_PV_3, 0) / T_DEL.D_C_P) orderPercent,
       multiIf(T_AVG.POSITION_NAME = '精准营销展示位一', 1, T_AVG.POSITION_NAME = '精准营销展示位二', 2,
               T_AVG.POSITION_NAME = '精准营销展示位三', 3, T_AVG.POSITION_NAME = '生态组件展示位一', 4,
               T_AVG.POSITION_NAME = '生态组件展示位二', 5, 6)                      RANK
FROM T_AVG
         LEFT JOIN T_DEL ON T_AVG.POSITION_NAME = T_DEL.POSITION_NAME
         LEFT JOIN T_ODR ON T_AVG.POSITION_NAME = T_ODR.POSITION_NAME
         LEFT JOIN T_ACT ON T_AVG.POSITION_NAME = T_ACT.POSITION_NAME
ORDER BY RANK;


时至今日,你依旧是我的光芒。