ck使用小技巧

SQL函数 — ClickHouse官方文档

ck 使用的时候会有很多坑,或者说是使用小技巧(临时先放着)

未整理

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
-- 两个字符串拼接 => "a b"
SELECT concat('a', ' ', 'b')

-- 除法,被除数可以为 0
-- 防止损失精度 要把分子分母都转为 Decimal 类型,精度 5位小数
SELECT
-- toDecimal128(xxx, 5)
-- toDecimal128(sum(xxx), 5)
-- intDivOrZero(xxx, xxx)
-- intDivOrZero(xxx, toDecimal128(sum(xxx),5))
-- intDivOrZero(toDecimal128(sum(xxx),5), toDecimal128(sum(xxx),5))
sum(retail_tot_amt_actual) sales,
count(retail_id) retail_count,
intDivOrZero(toDecimal128(sales,5), retail_count) atv,
intDivOrZero(toDecimal128(sum(retail_tot_qty),5), retail_count) upt,
intDivOrZero(toDecimal128(sales,5), toDecimal128(sum(retail_tot_amt_list),5)) md


-----------------------------------------------------------------------------------------------------------------------
-----------------------------------------------------------------------------------------------------------------------
-----------------------------------------------------------------------------------------------------------------------

-- SQL解释:下面 xxx 为占位符
-- position String ===> (288,532.4489361702127) (原始数据)
-- replaceRegexpAll(xxx, '\(|\)', '')) String ===> 288,532.4489361702127 (去除小括号)
-- splitByChar(',', cast(xxx as String))) String 数组 ===> ['288','532.4489361702127'] (字符串分割,转为 String数组 ===> 先转 String 防止报错误:Nested type Array(String) cannot be inside Nullable type)
-- arrayElement(xxx, 1) String ===> 288 (获取数组第1个元素)
-- arrayElement(xxx, 2) String ===> 532.4489361702127 (获取数组第2个元素)
SELECT
-- SQL解释:下面 xxx 为占位符
-- path_region_ids String ===> {3529,3531,3530,3531,3529} (原始数据)
-- replaceRegexpAll(xxx, '{|}', '')) String ===> 3529,3531,3530,3531,3529 (去除大括号)
-- splitByChar(',', xxx)) String 数组 ===> ['3529','3531','3530','3531','3529'] (字符串分割,转为 String数组)
-- arraySlice(xxx, 1, 4) String 数组 ===> ['3529','3531','3530','3531'] (从第1个元素开始,包含第1个元素,数组长度保留4)
-- arrayDistinct(xxx) String 数组 ===> ['3529','3531','3530'] (去重,为了更少的与右表进行关联)
-- arrayJoin(xxx) String 数组变 3 行,元素类型为 String
-- toInt16(xxx) 元素类型 String => int (为了方便join ===> t_store_region_config.region_path_id)
toInt16(
arrayJoin(
arrayDistinct(
arraySlice(
splitByChar(',', replaceRegexpAll(path_region_ids,'{|}','')) as arr_src,-- 原数组 ['3529','3531','3530','3531','3529']
1,4
) as arr_split-- 数组元素,最多4个 ['3529','3531','3530','3531']
) as arr_distinct-- 数组元素去重 ['3529','3531','3530']
) as arr_e_str-- 数组元素 字符串
) as arr_e_int,-- 数组元素 int
arr_split,
sum(activity_count) count
FROM t_store_region_path
GROUP BY arr_split
ORDER BY arr_split

日期

日期函数 — ClickHouse官方文档
类型转换 — ClickHouse官方文档
CK时间日期函数 — CSDN

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
-- DateTime 类型 => 字符串"yyyy-MM-dd HH:mm:ss"
SELECT formatDateTime(billtime, '%F %T')
SELECT toString(billtime)

-- dt Date类型
-- billtime 字符串"HH:mm:ss"
-- dt + billtime => 生成 DateTime 类型
SELECT toDateTime(concat(formatDateTime(dt, '%F'), ' ', billtime))


SELECT now() -- 2022-11-24 14:46:48.000
SELECT today() -- 2022-11-24
select yesterday() -- 2022-11-23
select toStartOfDay(now()) -- 2022-11-24 00:00:00.000
select toStartOfDay(today()) -- 2022-11-24 00:00:00.000
select toStartOfDay(yesterday()) -- 2022-11-23 00:00:00.000
select toDateTime(now()) -- 2022-11-24 14:51:42.000
select toDateTime(today()) -- 2022-11-24 00:00:00.000
select toDateTime(yesterday()) -- 2022-11-23 00:00:00.000
select formatDateTime(now(), '%F %T') -- 2022-11-24 14:56:03
select formatDateTime(now(), '%F') -- 2022-11-24
select formatDateTime(today(), '%F') -- 2022-11-24
select formatDateTime(yesterday(), '%F') -- 2022-11-23
select toString(now()) -- 2022-11-24 14:59:17
select toString(today()) -- 2022-11-24
select toString(yesterday()) -- 2022-11-23

排名函数

好多人说,为什么ck没有排名函数,其实有的,网上都没有给出相关案例。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
-- 排名函数
-- 2022-10-26 09:49:21
SELECT
value,
rank() over win as rk
FROM (
SELECT arrayJoin([11, 22, 22, 33]) value
) window win as ( order by value desc)
ORDER BY rk
SETTINGS allow_experimental_window_functions = 1
-- value|rk|
-- -----+--+
-- 33| 1|
-- 22| 2|
-- 22| 2|
-- 11| 4|

--------------------------------------------------------------------------------------------------------------
--------------------------------------------------------------------------------------------------------------
--------------------------------------------------------------------------------------------------------------

-- 再举一个例子:按照学生成绩进行排名

-- 1. 创建表
CREATE TABLE student
(
`id` UInt32,
`name` String,
`score` UInt32
)
ENGINE = MergeTree
ORDER BY id

-- 2. 插入5条数据
insert into student values
(1, '轩昂', 651),
(2, '冷峰', 555),
(3, '辰阳', 543),
(4, '浩淼', 456),
(5, '齐峰', 555);

-- 3. 查询一下
SELECT id, name, score FROM student ORDER BY id
-- id|name|score|
-- --+----+-----+
-- 1|轩昂 | 651|
-- 2|冷峰 | 555|
-- 3|辰阳 | 543|
-- 4|浩淼 | 456|
-- 5|齐峰 | 555|

-- 4. 按照成绩排名
SELECT
id, name, score,
rank() over win as rk
FROM (
SELECT id, name, score FROM student ORDER BY id
) window win as ( order by score desc)
ORDER BY rk
SETTINGS allow_experimental_window_functions = 1
-- id|name|score|rk|
-- --+----+-----+--+
-- 1|轩昂 | 651| 1|
-- 2|冷峰 | 555| 2|
-- 5|齐峰 | 555| 2|
-- 3|辰阳 | 543| 4|
-- 4|浩淼 | 456| 5|