ck使用小技巧

mysql使用小技巧
pg使用小技巧
ck使用小技巧
SQL函数 — ClickHouse官方文档

ck 使用的时候会有很多坑,或者说是使用小技巧(临时先放着)

数值计算

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
-- 四舍五入   保留几位小数   设置默认值
round(toDecimal128(ifNull(toString(sales_amt_actual_sum), '0'), 5)) sales,-- 默认0,四舍五入取整
round(toDecimal128(ifNull(toString(target_value_sum), '0'), 5)) target,
round(intDivOrZero(sales, target), 3) finish_rate-- 四舍五入,保留3位小数 #.#%



-- 除法,被除数可以为 0
-- 防止损失精度 要把分子分母都转为 Decimal 类型,精度 5位小数
SELECT
-- toDecimal128(xxx, 5)
-- toDecimal128(sum(xxx), 5)
-- intDivOrZero(xxx, xxx)
-- intDivOrZero(xxx, toDecimal128(sum(xxx),5))
-- intDivOrZero(toDecimal128(sum(xxx),5), toDecimal128(sum(xxx),5))
sum(retail_tot_amt_actual) sales,
count(retail_id) retail_count,
intDivOrZero(toDecimal128(sales,5), retail_count) atv,
intDivOrZero(toDecimal128(sum(retail_tot_qty),5), retail_count) upt,
intDivOrZero(toDecimal128(sales,5), toDecimal128(sum(retail_tot_amt_list),5)) md

字符串拼接

1
2
-- 两个字符串拼接 => "a b"
SELECT concat('a', ' ', 'b')

字符串替换/分割/一行变多行

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
-- SQL解释:下面 xxx 为占位符
-- position String ===> (288,532.4489361702127) (原始数据)
-- replaceRegexpAll(xxx, '\(|\)', '')) String ===> 288,532.4489361702127 (去除小括号)
-- splitByChar(',', cast(xxx as String))) String 数组 ===> ['288','532.4489361702127'] (字符串分割,转为 String数组 ===> 先转 String 防止报错误:Nested type Array(String) cannot be inside Nullable type)
-- arrayElement(xxx, 1) String ===> 288 (获取数组第1个元素)
-- arrayElement(xxx, 2) String ===> 532.4489361702127 (获取数组第2个元素)
SELECT
-- SQL解释:下面 xxx 为占位符
-- path_region_ids String ===> {3529,3531,3530,3531,3529} (原始数据)
-- replaceRegexpAll(xxx, '{|}', '')) String ===> 3529,3531,3530,3531,3529 (去除大括号)
-- splitByChar(',', xxx)) String 数组 ===> ['3529','3531','3530','3531','3529'] (字符串分割,转为 String数组)
-- arraySlice(xxx, 1, 4) String 数组 ===> ['3529','3531','3530','3531'] (从第1个元素开始,包含第1个元素,数组长度保留4)
-- arrayDistinct(xxx) String 数组 ===> ['3529','3531','3530'] (去重,为了更少的与右表进行关联)
-- arrayJoin(xxx) String 数组变 3 行,元素类型为 String
-- toInt16(xxx) 元素类型 String => int (为了方便join ===> t_store_region_config.region_path_id)
toInt16(
arrayJoin(
arrayDistinct(
arraySlice(
splitByChar(',', replaceRegexpAll(path_region_ids,'{|}','')) as arr_src,-- 原数组 ['3529','3531','3530','3531','3529']
1,4
) as arr_split-- 数组元素,最多4个 ['3529','3531','3530','3531']
) as arr_distinct-- 数组元素去重 ['3529','3531','3530']
) as arr_e_str-- 数组元素 字符串
) as arr_e_int,-- 数组元素 int
arr_split,
sum(activity_count) count
FROM t_store_region_path
GROUP BY arr_split
ORDER BY arr_split

json解析

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
-- ifNull(fetch_target_json, '[]')      -- 字符串默认 '[]'
-- JSONExtractArrayRaw -- 转为json数组
-- arrayJoin(...) AS json -- 一行变多行
-- visitParamExtractString(json, 'featureCode') AS feature_code -- 获取json中的某个key对应值
-- toDecimal128(ifNull(visitParamExtractString(json, 'targetValue'), '0'), 5) AS target_value -- 获取对应值,默认 '0' 再转为 Decimal
-- AND feature_code='sales' -- 过滤
SELECT
store_id,
SUM(target_value) target_value_sum
FROM
(
SELECT
rel_id AS store_id,
arrayJoin(
JSONExtractArrayRaw(
ifNull(fetch_target_json, '[]')
)
) AS json,
visitParamExtractString(json, 'featureCode') AS feature_code,
toDecimal128(ifNull(visitParamExtractString(json, 'targetValue'), '0'), 5) AS target_value
FROM
t_sc_feature_daily_target_info
WHERE
tenant_id = $tenant_id$
AND rel_type = 'STORE'
AND rel_id IN (SELECT store_id FROM t_ads_store_scd WHERE tenant_id=$tenant_id$ AND if_create_sc = 1)
-- AND stats_date BETWEEN '2023-01-01' AND '2023-01-09'
AND stats_date BETWEEN toStartOfMonth(now()) AND yesterday()
AND feature_code='sales'
)
GROUP BY store_id ORDER BY store_id

日期

日期函数 — ClickHouse官方文档
类型转换 — ClickHouse官方文档
CK时间日期函数 — CSDN

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
-- 字符串 + 小时数
-- AND billtime between addHours(today(), 7) AND addHours(today(), 12)
AND billtime between addHours(toDate($report_date$), 7) AND addHours(toDate($report_date$), 12)

-- DateTime 类型 => 字符串"yyyy-MM-dd HH:mm:ss"
SELECT formatDateTime(billtime, '%F %T')
formatDateTime(rdd.billtime, '%H:%M') AS billtime,-- 订单时间 格式 "HH:mm"
-- formatDateTime(rdd.billtime, '%F %T') AS billtime,
SELECT toString(billtime)

-- dt Date类型
-- billtime 字符串"HH:mm:ss"
-- dt + billtime => 生成 DateTime 类型
SELECT toDateTime(concat(formatDateTime(dt, '%F'), ' ', billtime))


SELECT now() -- 2022-11-24 14:46:48.000
SELECT today() -- 2022-11-24
select yesterday() -- 2022-11-23
select toStartOfMonth(now()) -- 2022-11-01 当月1号

select toStartOfDay(now()) -- 2022-11-24 00:00:00.000
select toStartOfDay(today()) -- 2022-11-24 00:00:00.000
select toStartOfDay(yesterday()) -- 2022-11-23 00:00:00.000
select toDateTime(now()) -- 2022-11-24 14:51:42.000
select toDateTime(today()) -- 2022-11-24 00:00:00.000
select toDateTime(yesterday()) -- 2022-11-23 00:00:00.000
select formatDateTime(now(), '%F %T') -- 2022-11-24 14:56:03
select formatDateTime(now(), '%F') -- 2022-11-24
select formatDateTime(today(), '%F') -- 2022-11-24
select formatDateTime(yesterday(), '%F') -- 2022-11-23
select toString(now()) -- 2022-11-24 14:59:17
select toString(today()) -- 2022-11-24
select toString(yesterday()) -- 2022-11-23

-- 小时
-- 字符串拼接 'yyyy-MM-dd HH:mm:ss' concat('2023-01-01', ' ', '12:13:14')
-- 'yyyy-MM-dd HH:mm:ss' => DateTime toDateTime('2023-01-01 12:13:14')
-- DateTime => 小时数 Uint8 toHour(toDateTime('2023-01-01 12:13:14'))
SELECT
-- ...
concat('2023-01-01', ' ', first_order_time) billtime,
FROM t_ads_store_daily_stats
WHERE
-- ...
AND toHour(toDateTime(billtime)) BETWEEN 7 AND 10

排名函数

好多人说,为什么ck没有排名函数,其实有的,网上都没有给出相关案例。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
-- 排名函数
-- 2022-10-26 09:49:21
SELECT
value,
rank() over win as rk
FROM (
SELECT arrayJoin([11, 22, 22, 33]) value
) window win as ( order by value desc)
ORDER BY rk
SETTINGS allow_experimental_window_functions = 1
-- value|rk|
-- -----+--+
-- 33| 1|
-- 22| 2|
-- 22| 2|
-- 11| 4|

--------------------------------------------------------------------------------------------------------------
--------------------------------------------------------------------------------------------------------------
--------------------------------------------------------------------------------------------------------------

-- 再举一个例子:按照学生成绩进行排名

-- 1. 创建表
CREATE TABLE student
(
`id` UInt32,
`name` String,
`score` UInt32
)
ENGINE = MergeTree
ORDER BY id

-- 2. 插入5条数据
insert into student values
(1, '轩昂', 651),
(2, '冷峰', 555),
(3, '辰阳', 543),
(4, '浩淼', 456),
(5, '齐峰', 555);

-- 3. 查询一下
SELECT id, name, score FROM student ORDER BY id
-- id|name|score|
-- --+----+-----+
-- 1|轩昂 | 651|
-- 2|冷峰 | 555|
-- 3|辰阳 | 543|
-- 4|浩淼 | 456|
-- 5|齐峰 | 555|

-- 4. 按照成绩排名
SELECT
id, name, score,
rank() over win as rk
FROM (
SELECT id, name, score FROM student ORDER BY id
) window win as ( order by score desc)
ORDER BY rk
SETTINGS allow_experimental_window_functions = 1
-- id|name|score|rk|
-- --+----+-----+--+
-- 1|轩昂 | 651| 1|
-- 2|冷峰 | 555| 2|
-- 5|齐峰 | 555| 2|
-- 3|辰阳 | 543| 4|
-- 4|浩淼 | 456| 5|

WITH语法

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
-- 2023-01-13...14.02.10 ck 多with语法.txt
WITH target_info AS
(
select
rel_id,
tenant_id,
sum(toDecimal64OrZero(visitParamExtractString(arrayJoin(JSONExtractArrayRaw(cast(fetch_target_json as String))), 'targetValue'), 4)) AS targetValue
from
t_sc_feature_month_target_info
where
visitParamExtractString(arrayJoin(JSONExtractArrayRaw(cast(fetch_target_json as String))), 'featureCode') = 'sales'
and stats_date = '2022-12-01'
and tenant_id = 'lanling'
and rel_type = 'STORE'
and del_flag = 0
and rel_id in (6 ,7 ,8 ,9 ,10 ,11 ,12 ,13 ,14 ,15 ,16 ,17)
group by
rel_id, tenant_id, stats_date
),
store_daily_real_time AS
(
SELECT
sales_amt_actual,
tenant_id,
store_id
FROM
t_ads_store_month_total_stats
where stats_date = '2022-12-07' and tenant_id = 'lanling'
and store_id in (6 ,7 ,8 ,9 ,10 ,11 ,12 ,13 ,14 ,15 ,16 ,17)
)
SELECT
store_id,
targetValue,
sales_amt_actual,
intDivOrZero(toDecimal128(sales_amt_actual,5), toDecimal128(targetValue,5)) sales_achieving_rate
FROM
target_info
right JOIN store_daily_real_time
ON target_info.rel_id = store_daily_real_time.store_id and target_info.tenant_id = store_daily_real_time.tenant_id;