ck使用小技巧

mysql使用小技巧
pg使用小技巧
ck使用小技巧
SQL函数 — ClickHouse官方文档

ck 使用的时候会有很多坑,或者说是使用小技巧(临时先放着)

查询DDL

1
2
-- 2024-03-22 17:09:44
show create table 表名;

表:创建/插入/删除

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
select * from aaa;
drop table aaa;

create table aaa
(
id String comment 'id',
stats_date Date comment '统计日期',
date_created DateTime comment '创建日期'
) engine = MergeTree ORDER BY (id);



-- 新增
insert into aaa (id, stats_date, date_created) values
('001', '2024-02-19', '2024-02-19 08:00:00'),
('002', '2024-02-20', '2024-02-20 09:00:00'),
('003', '2024-02-21', '2024-02-21 10:00:00');

-- 更新
alter table t_ads_scd update statis_dt='2023-07-13' where sale_amt=39; -- 更新带有条件
alter table t_ads_scd update store_name='abc' where 1=1; -- 更新所有

-- 删除
alter table t_ads_scd delete where statis_dt='2023-07-19'; -- 删除

-- ck 添加/修改/删除列 ====> https://blog.csdn.net/moose_killer/article/details/124047279
添加列 alter table amoros_ads_uat.t_ads_org_structure_store_affil add column org_name string;
删除列 alter table amoros_ads_uat.t_ads_org_structure_store_affil drop column org_name;

聚合函数

1
2
3
4
5
6
7
8
9
10
11
12
13
-- 查询字段 可group,也可max/min/any
select tenant_id ... group by tenant_id
select max(tenant_id) ...
select any(tenant_id) ...

-- count 多个参数去重
countDistinctIf(tuple(prod.store_id, prod.product_id), prod.sales_amt_actual>0)

-- 不去重
countIf(tuple(prod.store_id, prod.product_id), prod.sales_amt_actual>0)

-- 符合再相加
sumIf(sales_amt_actual, org_prod_tag.sales_performance = 1)

数值计算

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
-- 四舍五入   保留几位小数   设置默认值
round(toDecimal128(ifNull(toString(sales_amt_actual_sum), '0'), 5)) sales,-- 默认0,四舍五入取整
round(toDecimal128(ifNull(toString(target_value_sum), '0'), 5)) target,
round(intDivOrZero(sales, target), 3) finish_rate-- 四舍五入,保留3位小数 #.#%



-- 除法,被除数可以为 0
-- 防止损失精度 要把分子分母都转为 Decimal 类型,精度 5位小数
SELECT
-- toDecimal128(xxx, 5)
-- toDecimal128(sum(xxx), 5)
-- intDivOrZero(xxx, xxx)
-- intDivOrZero(xxx, toDecimal128(sum(xxx),5))
-- intDivOrZero(toDecimal128(sum(xxx),5), toDecimal128(sum(xxx),5))
sum(retail_tot_amt_actual) sales,
count(retail_id) retail_count,
intDivOrZero(toDecimal128(sales,5), retail_count) atv,
intDivOrZero(toDecimal128(sum(retail_tot_qty),5), retail_count) upt,
intDivOrZero(toDecimal128(sales,5), toDecimal128(sum(retail_tot_amt_list),5)) md

字符串拼接

1
2
-- 两个字符串拼接 => "a b"
SELECT concat('a', ' ', 'b')

字符串替换/分割/一行变多行

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
-- SQL解释:下面 xxx 为占位符
-- position String ===> (288,532.4489361702127) (原始数据)
-- replaceRegexpAll(xxx, '\(|\)', '')) String ===> 288,532.4489361702127 (去除小括号)
-- splitByChar(',', cast(xxx as String))) String 数组 ===> ['288','532.4489361702127'] (字符串分割,转为 String数组 ===> 先转 String 防止报错误:Nested type Array(String) cannot be inside Nullable type)
-- arrayElement(xxx, 1) String ===> 288 (获取数组第1个元素)
-- arrayElement(xxx, 2) String ===> 532.4489361702127 (获取数组第2个元素)
select
-- SQL解释:下面 xxx 为占位符
-- path_region_ids String ===> {3529,3531,3530,3531,3529} (原始数据)
-- replaceRegexpAll(xxx, '{|}', '')) String ===> 3529,3531,3530,3531,3529 (去除大括号)
-- splitByChar(',', xxx)) String 数组 ===> ['3529','3531','3530','3531','3529'] (字符串分割,转为 String数组)
-- arraySlice(xxx, 1, 4) String 数组 ===> ['3529','3531','3530','3531'] (从第1个元素开始,包含第1个元素,数组长度保留4)
-- arrayDistinct(xxx) String 数组 ===> ['3529','3531','3530'] (去重,为了更少的与右表进行关联)
-- arrayJoin(xxx) String 数组变 3 行,元素类型为 String
-- toInt16(xxx) 元素类型 String => int (为了方便join ===> t_store_region_config.region_path_id)
toInt16(
arrayJoin(
arrayDistinct(
arraySlice(
splitByChar(',', replaceRegexpAll(path_region_ids,'{|}','')) as arr_src,-- 原数组 ['3529','3531','3530','3531','3529']
1,4
) as arr_split-- 数组元素,最多4个 ['3529','3531','3530','3531']
) as arr_distinct-- 数组元素去重 ['3529','3531','3530']
) as arr_e_str-- 数组元素 字符串
) as arr_e_int,-- 数组元素 int
arr_split,
sum(activity_count) count
from t_store_region_path
group by arr_split
order by arr_split

json解析

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
select * from system.functions where name like '%visitParamExtractString%';
select * from system.functions where name like '%visit%';
-- visitParamExtractInt null=0 否则int

-- 2024-04-18 10:33:00
-- 解析 a.b {"a": {"b": "我是b"}}
-- 说明:目前没找到直接解析 a.b 的函数,只能先解析 a,再解析 b ===> json为''时,返回 ''
visitParamExtractString(visitParamExtractRaw(json, 'a'), 'b')


-- ifNull(fetch_target_json, '[]') -- 字符串默认 '[]'
-- JSONExtractArrayRaw -- 转为json数组
-- arrayJoin(...) AS json -- 一行变多行
-- visitParamExtractString(json, 'featureCode') AS feature_code -- 获取json中的某个key对应值
-- toDecimal128(ifNull(visitParamExtractString(json, 'targetValue'), '0'), 5) AS target_value -- 获取对应值,默认 '0' 再转为 Decimal
-- AND feature_code='sales' -- 过滤
select
store_id,
sum(target_value) target_value_sum
from
(
select
rel_id as store_id,
arrayJoin(
JSONExtractArrayRaw(
ifNull(fetch_target_json, '[]')
)
) as json,
visitParamExtractString(json, 'featureCode') as feature_code,
toDecimal128(ifNull(visitParamExtractString(json, 'targetValue'), '0'), 5) as target_value
from
t_sc_feature_daily_target_info
where
tenant_id = $tenant_id$
and rel_type = 'STORE'
and rel_id in (select store_id from t_ads_store_scd where tenant_id=$tenant_id$ and if_create_sc = 1)
-- AND stats_date BETWEEN '2023-01-01' AND '2023-01-09'
and stats_date between toStartOfMonth(now()) and yesterday()
and feature_code='sales'
)
group by store_id order by store_id

map函数

1
2
3
4
5
6
7
8
9
10
11
12
select * from system.functions where name like '%map%';

-- select toJSONString(tuple('sales_amt_actual', sales_amt_actual, 'pct', pct)) json from source
-- toDecimal128(ifNull(visitParamExtractString(json, 'sales_amt_actual'), '0'), 5)

select visitParamExtractInt(json, 'sales_amt_actual') sales_amt_actual from (
select
toJSONString(map('sales_amt_actual', sales_amt_actual, 'pct', pct, 'age', 18)) json,
-- visitParamExtractInt(json, 'sales_amt_actual') sales_amt_actual,-- Cyclic aliases.
visitParamExtractInt(json, 'age') age
from source
)

日期

日期函数 — ClickHouse官方文档
类型转换 — ClickHouse官方文档
CK时间日期函数 — CSDN

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
-- 相关日期函数(这里能查出来的,官方文档不一定有)
select * from system.functions where name like '%Date%';
-- toDateTime toDateTimeOrNull 后者代替前者,避免toHour报错

-- 字符串 + 小时数
-- and billtime between addHours(today(), 7) and addHours(today(), 12)
and billtime between addHours(toDate($report_date$), 7) and addHours(toDate($report_date$), 12)

-- DateTime 类型 => 字符串"yyyy-MM-dd HH:mm:ss"
select formatDateTime(billtime, '%F %T')
formatDateTime(rdd.billtime, '%H:%M') as billtime,-- 订单时间 格式 "HH:mm"
-- formatDateTime(rdd.billtime, '%F %T') AS billtime,
select toString(billtime)

-- dt Date类型
-- billtime 字符串"HH:mm:ss"
-- dt + billtime => 生成 DateTime 类型
select toDateTime(concat(formatDateTime(dt, '%F'), ' ', billtime))


select now() -- 2022-11-24 14:46:48.000
select today() -- 2022-11-24
select yesterday() -- 2022-11-23
select toStartOfMonth(now()) -- 2022-11-01 当月1号

select toStartOfDay(now()) -- 2022-11-24 00:00:00.000
select toStartOfDay(today()) -- 2022-11-24 00:00:00.000
select toStartOfDay(yesterday()) -- 2022-11-23 00:00:00.000
select toDateTime(now()) -- 2022-11-24 14:51:42.000
select toDateTime(today()) -- 2022-11-24 00:00:00.000
select toDateTime(yesterday()) -- 2022-11-23 00:00:00.000
select formatDateTime(now(), '%F %T') -- 2022-11-24 14:56:03
select formatDateTime(now(), '%F') -- 2022-11-24
select formatDateTime(today(), '%F') -- 2022-11-24
select formatDateTime(yesterday(), '%F') -- 2022-11-23
select toString(now()) -- 2022-11-24 14:59:17
select toString(today()) -- 2022-11-24
select toString(yesterday()) -- 2022-11-23

-- 小时
-- 字符串拼接 'yyyy-MM-dd HH:mm:ss' concat('2023-01-01', ' ', '12:13:14')
-- 'yyyy-MM-dd HH:mm:ss' => DateTime toDateTime('2023-01-01 12:13:14')
-- DateTime => 小时数 Uint8 toHour(toDateTime('2023-01-01 12:13:14'))
select
-- ...
concat('2023-01-01', ' ', first_order_time) billtime,
from t_ads_store_daily_stats
where
-- ...
and toHour(toDateTime(billtime)) between 7 and 10

排名函数

好多人说,为什么ck没有排名函数,其实有的,网上都没有给出相关案例。
window-functions — ClickHouse

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
-- 排名函数
-- 2022-10-26 09:49:21
select
value,
rank() over win as rk -- 1 2 2 4
-- dense_rank() over win as rk -- 1 2 2 3
-- row_number() over win as rk -- 1 2 3 4
from (
select arrayJoin([11, 22, 22, 33]) value
) window win as (order by value desc)
-- ) window win as (partition by store_id order by value desc) 按照店铺分组
order by rk
settings allow_experimental_window_functions = 1
-- value|rk|
-- -----+--+
-- 33| 1|
-- 22| 2|
-- 22| 2|
-- 11| 4|

-- 等同于上面的结果,只是写法不同而已
select
value,
rank() over (order by value) rk
from (
select arrayJoin([11, 22, 22, 33]) value
)
settings allow_experimental_window_functions = 1
--------------------------------------------------------------------------------------------------------------
--------------------------------------------------------------------------------------------------------------
--------------------------------------------------------------------------------------------------------------

-- 再举一个例子:按照学生考试成绩排名

-- 1. 创建表
-- drop table student; -- 删除表
create table student
(
`id` UInt32,
`name` String,
`english_score` UInt32,
`math_score` UInt32
)
engine = MergeTree
order by id;

-- 2. 插入5条数据(分数随机) ---- 大牛 二蛋 三驴 四毛 五虎
insert into student values
(1, '大牛', 90, 85),
(2, '二蛋', 85, 100),
(3, '三驴', 80, 85),
(4, '四毛', 80, 95),
(5, '五虎', 75, 90);

-- 3. 查询一下
select * from student;

-- 4. 排名:英语成绩,并排序
select
*,
rank() over win as rk_english
from (
select * from student
) window win as ( order by english_score desc)
order by rk_english
settings allow_experimental_window_functions = 1
;
-- +--+----+-------------+----------+----------+
-- |id|name|english_score|math_score|rk_english|
-- +--+----+-------------+----------+----------+
-- |1 |大牛 |90 |85 |1 |
-- |2 |二蛋 |85 |100 |2 |
-- |3 |三驴 |80 |85 |3 |
-- |4 |四毛 |80 |95 |3 |
-- |5 |五虎 |75 |90 |5 |
-- +--+----+-------------+----------+----------+

-- 5. 排名:英语成绩、数据成绩
select
*,
rank() over win_english as rk_english,
rank() over win_math as rk_math,
''
from (
select * from student
)
window
win_english as ( order by english_score desc),
win_math as ( order by math_score desc)
order by id
settings allow_experimental_window_functions = 1
;
-- +--+----+-------------+----------+----------+-------+--+
-- |id|name|english_score|math_score|rk_english|rk_math|''|
-- +--+----+-------------+----------+----------+-------+--+
-- |1 |大牛 |90 |85 |1 |4 | |
-- |2 |二蛋 |85 |100 |2 |1 | |
-- |3 |三驴 |80 |85 |3 |4 | |
-- |4 |四毛 |80 |95 |3 |2 | |
-- |5 |五虎 |75 |90 |5 |3 | |
-- +--+----+-------------+----------+----------+-------+--+

-- 6. 排名:英语成绩、数据成绩(推荐:效果同上,写法更简洁)
select
*,
rank() over (order by english_score desc) as rk_english,
rank() over (order by math_score desc) as rk_math,
''
from (
select * from student
)
order by id
settings allow_experimental_window_functions = 1
;
-- +--+----+-------------+----------+----------+-------+--+
-- |id|name|english_score|math_score|rk_english|rk_math|''|
-- +--+----+-------------+----------+----------+-------+--+
-- |1 |大牛 |90 |85 |1 |4 | |
-- |2 |二蛋 |85 |100 |2 |1 | |
-- |3 |三驴 |80 |85 |3 |4 | |
-- |4 |四毛 |80 |95 |3 |2 | |
-- |5 |五虎 |75 |90 |5 |3 | |
-- +--+----+-------------+----------+----------+-------+--+

WITH语法

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
-- 2023-01-13...14.02.10 ck 多with语法.txt
with target_info AS
(
select
rel_id,
tenant_id,
sum(toDecimal64OrZero(visitParamExtractString(arrayJoin(JSONExtractArrayRaw(cast(fetch_target_json as String))), 'targetValue'), 4)) as targetValue
from
t_sc_feature_month_target_info
where
visitParamExtractString(arrayJoin(JSONExtractArrayRaw(cast(fetch_target_json as String))), 'featureCode') = 'sales'
and stats_date = '2022-12-01'
and tenant_id = 'lanling'
and rel_type = 'STORE'
and del_flag = 0
and rel_id in (6 ,7 ,8 ,9 ,10 ,11 ,12 ,13 ,14 ,15 ,16 ,17)
group by
rel_id, tenant_id, stats_date
),
store_daily_real_time as
(
select
sales_amt_actual,
tenant_id,
store_id
from
t_ads_store_month_total_stats
where stats_date = '2022-12-07' and tenant_id = 'lanling'
and store_id in (6 ,7 ,8 ,9 ,10 ,11 ,12 ,13 ,14 ,15 ,16 ,17)
)
select
store_id,
targetValue,
sales_amt_actual,
intDivOrZero(toDecimal128(sales_amt_actual,5), toDecimal128(targetValue,5)) sales_achieving_rate
from
target_info
right join store_daily_real_time
on target_info.rel_id = store_daily_real_time.store_id and target_info.tenant_id = store_daily_real_time.tenant_id;

未分类

上周一

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
-- 实际使用
-- [上周一, 上周日]
and stats_date
between
addDays(toStartOfWeek(toDate($reportDate$), 1), -7)
and
addDays(toStartOfWeek(toDate($reportDate$), 1), -1)


-- dbeaver
-- 2023-07-26 15:04:22 上周一

-- 参考 CK 日期函数 https://clickhouse.com/docs/zh/sql-reference/functions/date-time-functions

select
toDate('2023-07-26') a1,-- 字符串转 Date
addWeeks(a1, 1) a2,-- Date + 7天
addWeeks(a1, -1) a3,-- Date - 7天
toStartOfWeek(a1, 1) a4,-- Date 所在周的开始 默认0周日为第一天 1周一为第一天
'---',
addDays(toStartOfWeek(toDate('2023-07-26'), 1), 0) bs1,-- 2023-07-24 当前周一
addDays(toStartOfWeek(toDate('2023-07-26'), 1), 6) bs2,-- 2023-07-30 当前周日

addDays(toStartOfWeek(toDate('2023-07-26'), 1), -7) b1,-- 2023-07-17 上周一
addDays(toStartOfWeek(toDate('2023-07-26'), 1), -1) b2,-- 2023-07-23 上周日
'';

平均首单时间

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
-- 实际使用
-- 时间取平均值
-- "HH:mm:ss" => 拼接日期 => 转 DateTime => 时间戳
-- 平均时间戳 => 取整 => 转 DateTime => 取字符串 "HH:mm"
select
store_id, formatDateTime(FROM_UNIXTIME(toInt32(avg(first_order_time))), '%H:%M') avg_first_bill_time
from (
select store_id, toInt32(toDateTime(concat($reportDate$, ' ', first_order_time))) first_order_time

-- dbeaver
-- 2023-07-26 15:30:27
-- 平均首单时间
select
store_id,
avg(first_order_time) t1, -- 有小数 3173=1690325920.5 3164=1690344791.5
round(avg(first_order_time), 0) t2, -- 四舍五入 取整
FROM_UNIXTIME(toInt32(round(avg(first_order_time), 0))) t3,

toInt32(avg(first_order_time)) t22, -- 舍去小数 取整
FROM_UNIXTIME(toInt32(avg(first_order_time))) t33,
formatDateTime(t33, '%H:%M') t44,
-- %F YYYY-MM-DD,相当于%Y-%m-%d
-- %Y 年
-- %m 月份为十进制数(01-12)
-- %d 月中的一天,零填充(01-31)

-- %T HH:MM:SS,相当于%H:%M:%S
-- %H 24小时格式(00-23)
-- %M 分钟(00-59)
-- %S 秒 (00-59)
''
from a2
group by store_id

case when

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
-- 实际使用
case when fb.billtime is null or toString(fb.billtime)='1970-01-01 08:00:00' then '未开单' else formatDateTime(fb.billtime, '%H:%M') end billtime, -- 首单时间 'HH:mm'

-- dbeaver
-- 2023-07-26 17:24:59
-- case when 使用


select
'a' a,
case when a='a' then '1' else '2' end b1,
case when a='aaaa' then '1' else '2' end b2,
case when a='aaaa' then '1' else null end b3,
''


-- 也可以 'case when 条件 then 值 end 别名'

if / multiIf

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
-- if 与 case when
if(a is null, 0, a)
case when a is null then 0 else a end


-- multiIf 与 case when (2024-07-20 15:29:45)
select
1 n,

case
when n=1 then '111'
when n=2 then '222'
else '333'
end n_1,

multiIf(
n=1, '111',
n=2, '222',
'333'
) n_2
;

ifNull 字符串默认值

1
2
3
4
5
-- 默认字符串 0
ifNull(toString(cc.customer_cnt), '0') customer_cnt

-- 默认空数组
ifNull(fetch_target_json, '[]')

左关联默认值

1
2
3
4
5
6
7
8
9
10
-- 默认值显示 0
ifNull(toString(cc.customer_cnt), '0') customer_cnt

-- 左关联为 0
-- 左关联会把 fb.h 设置为0
where fb.h is null or fb.h=0 or cc.h<=fb.h

-- 左关联时间 DateTime '1970-01-01 08:00:00'
-- 左关联会把 fb.billtime 设置为 DateTime '1970-01-01 08:00:00'
case when fb.billtime is null or toString(fb.billtime)='1970-01-01 08:00:00' then '未开单' else formatDateTime(fb.billtime, '%H:%M') end billtime, -- 首单时间 'HH:mm'

自定义函数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
select * from system.functions where name like '%Div%'
select * from system.functions where name like 'minus'
select * from system.functions where name like '%visitParamExtractString%';
select * from system.functions where name like '%visit%';
select * from system.functions where name like '%JSON%';
select * from system.functions where name like '%map%';
select * from system.functions where name like '%Array%';
select * from system.functions where name like '%date%';
select * from system.functions where name like '%Date%';

select * from system.functions where name like '%count%';
select * from system.functions where name like '%amoros%';
select * from system.functions where name like 'amorosDiv';

select distinct origin from system.functions;-- System SQLUserDefined
select * from system.functions;-- 查询所有函数
select * from system.functions where origin='System';-- 系统函数
select * from system.functions where origin='SQLUserDefined';-- 自定义函数

CREATE FUNCTION amorosDiv AS (a, b) -> if(b = 0, NULL, intDivOrZero(toDecimal128(a, 5), toDecimal128(b, 5)))
CREATE FUNCTION amorosRound0 AS a -> toDecimal128(round(toDecimal128(a, 5), 0), 0);
CREATE FUNCTION amorosRound1 AS a -> toDecimal128(round(toDecimal128(a, 5), 1), 1);
CREATE FUNCTION amorosRound2 AS a -> toDecimal128(round(toDecimal128(a, 5), 2), 2);
CREATE FUNCTION amorosRound3 AS a -> toDecimal128(round(toDecimal128(a, 5), 3), 3);
-- drop function amorosDiv

集群

1
2
3
4
5
6
7
-- 删除函数 - 集群
DROP FUNCTION amorosDiv ON CLUSTER cluster_single_shard;

-- 创建函数 - 集群
CREATE FUNCTION amorosDiv ON CLUSTER cluster_single_shard AS (a, b) -> if(b = 0, NULL, intDivOrZero(toDecimal128(a, 5), toDecimal128(b, 5)));

需要添加集群参数

分组取第一条

1
2
3
4
5
6
7
8
9
with xxx as (
...
), rk as (
select * from (
select *, row_number() over (partition by store_id order by sales_amt_actual desc) AS rk
from p
SETTINGS allow_experimental_window_functions = 1
) tmp where rk <= 5
)

合并数组/数组拆分

1
2
3
4
5
with l as (
...
), post as (
select user_id, arrayStringConcat(groupUniqArray(post_name), ',') post_name from matomo_user_org group by user_id
)

arrayFilter/arrayMap

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
with tmp as (
select 1 gender, 1 is_tall, 1 is_weight
union all
select 2 gender, 1 is_tall, 0 is_weight
)
select
arrayStringConcat(-- 数组元素连接 ', '
arrayConcat(-- 合并 arr1, arr2
-- arr1
[case when gender = 1 then '男' when gender = 2 then '女' end],

-- arr2
arrayFilter(
x -> x != '',
arrayMap(
(k, v) -> if(k=1, v, ''),
[is_tall, is_weight],
['高', '重']
)
)
),

', '
) tags from tmp;

-- +----------+
-- |tags |
-- +----------+
-- |男, 高, 重|
-- |女, 高 |
-- +---------+

元组tuple

1
2
3
4
5
6
7
8
9
10
11
12
13
-- 2024-03-21 16:18   prompt.yml
select
tupleElement(tuple, 1) no,
tupleElement(tuple, 2) code,
tupleElement(tuple, 3) description,
null tenant_id
from (
select arrayJoin([
tuple(1, 'k1', '吧,\n图表1展'),
tuple(2, 'k2', '以性别为分类,\n图表1展示①男/女/中性类商品的销'),
tuple(99999999, '测试code', '测试description')
]) tuple
) order by no

数组array

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
-- 合并为数组(聚合函数) ===> groupUniqArray、groupUniqArrayIf
groupUniqArray(prod.product_id) sku_list,
groupUniqArrayIf(prod.product_id, prod.sales_amt_actual > 0) sales_sku_list,
groupUniqArrayIf(prod.product_id, prod.inv_amt > 0) inv_sku_list,
groupUniqArrayIf(prod.product_id, prod.is_full_size = 1) full_size_sku_list,
groupUniqArrayIf(prod.product_id, prod.sales_amt_actual > 0 or prod.inv_amt > 0) sales_or_inv_sku_list,

-- 数组合并,并去重
arrayDistinct(arrayFlatten(arrayConcat(groupArray(sales_store_list)))) sales_store_list, -- groupUniqArray(arrayJoin(prod.sales_store_list))
arrayDistinct(arrayFlatten(arrayConcat(groupArray(inv_store_list)))) inv_store_list, -- groupUniqArray(arrayJoin(prod.inv_store_list))
arrayDistinct(arrayFlatten(arrayConcat(groupArray(full_size_store_list)))) full_size_store_list, -- groupUniqArray(arrayJoin(prod.full_size_store_list))
arrayDistinct(arrayFlatten(arrayConcat(groupArray(sales_or_inv_store_list)))) sales_or_inv_store_list, -- groupUniqArray(arrayJoin(prod.sales_or_inv_store_list))
arrayDistinct(arrayFlatten(arrayConcat(groupArray(in_short_store_list)))) in_short_store_list, -- groupUniqArray(arrayJoin(prod.in_short_store_list))

-- 数组长度
length(sales_store_list) sales_store_count,

-- 字符串转数组
select splitByChar(',', replaceRegexpAll('[111,222]', '^\\[|\\]', '')) as arr;
select JSONExtractArrayRaw('[111,222]') as arr;

多字段相加

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
-- null + 0   结果为 null

select
...

-- 0+大货+儿童+潮牌 销售额
0
<if test="$where.brand_code$!=null && $where.brand_code$.contains('1101')">
+if(a is null, 0, a)
</if>
<if test="$where.brand_code$!=null && $where.brand_code$.contains('1102')">
+if(b is null, 0, b)
</if>
<if test="$where.brand_code$!=null && $where.brand_code$.contains('1103')">
+if(c is null, 0, c)
</if>
from xxx

bitmap

2024-07-20 15:28:07

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
-- bitmap Array String 基数
select
[
51, 50, 49, 47, 46, 45, 40, 39, 38, 34, 33, 32, 31, 28, 22, 18, 17, 16, 15, 14, 13, 129, 131, 130, 119, 76, 74, 67, 84, 89, 115, 190139, 48, 42, 41, 37, 36, 35, 30, 29, 27, 26, 25, 24, 23, 20, 19, 126, 123, 121, 118, 116, 103, 100, 99, 88, 82, 79, 78, 55, 56, 77, 65, 63, 57, 54, 323, 190140, 190141, 190142, 190143, 190144, 12, 11, 10, 9, 8, 7, 6, 4, 3, 2, 113, 124, 120, 108, 101, 96, 91, 90, 87, 85, 52, 53, 66, 58, 320, 128, 127, 125, 122, 117, 114, 110, 107, 105, 104, 102, 97, 95, 94, 93, 86, 73, 71, 70, 69, 62, 61, 60, 59, 132, 133, 202, 203, 209, 213, 139, 141, 148, 161, 169, 179, 181, 191, 193, 194, 200, 135, 160, 147, 167, 158, 159, 162, 173, 174, 195, 185, 177, 188, 214, 164, 165, 166, 186, 206, 163, 142, 155, 187, 201, 134, 136, 138, 140, 210, 156, 192, 172, 175, 154, 211, 170, 207, 182, 153, 157, 199, 190, 198, 171, 176, 137, 212, 183, 189, 145, 205, 146, 196, 184, 324, 204, 144, 208, 143, 149, 151, 152, 150, 168, 180, 178, 277, 275, 276, 280, 281, 282, 284, 285, 190149, 267, 268, 269, 270, 271, 272, 273, 274, 278, 279, 283, 286, 287, 190152, 190151, 190153, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 190150, 216, 217, 218, 190007, 228, 223, 225, 221, 226, 220, 224, 227, 222, 321, 231, 256, 234, 236, 233, 229, 238, 255, 232, 235, 230, 190006, 242, 246, 244, 243, 245, 241, 247, 249, 318, 248, 253, 254, 239, 250, 319, 252, 240, 251, 322, 288, 289, 190239, 190250, 293, 292, 291, 190214, 190220, 190225, 190226, 190233, 190244, 190252, 190258, 190260, 190261, 295, 294, 297, 296, 300, 301, 298, 299, 190215, 190223, 190234, 190264, 190263, 302, 303, 304, 190238, 190248, 305, 306, 307, 190224, 190240, 190241, 190242, 190245, 190253, 190256, 308, 309, 310, 190213, 190266, 313, 311, 290, 312, 190221, 190227, 190228, 190229, 190237, 190247, 190249, 190251, 190254, 190265, 316, 317, 314, 315, 190211, 190212, 190216, 190217, 190218, 190219, 190222, 190230, 190236, 190255, 190259, 190231, 190232, 190235, 190243, 190257, 190246, 190262, 192512, 192448, 192547, 192465, 192474, 192491, 192481, 192509, 192519, 192538, 192463, 192522, 192539, 192445, 192484, 192476, 192469, 192529, 192505, 192449, 192454, 192551, 192503, 192518, 192534, 192525, 192495, 192504, 192542, 192530, 192535, 192521, 192500, 192459, 192548, 192541, 192501, 192499, 192508, 192483, 192496, 192489, 192458, 192464, 192528, 192466, 192490, 192540, 192468, 192470, 192467, 192513, 192493, 192478, 192523, 192502, 192497, 192447, 192537, 192461, 192514, 192524, 192462, 192482, 192526, 192549, 192477, 192455, 192520, 192446, 192457, 192507, 192532, 192533, 192516, 192506, 192544, 192479, 192531, 192510, 192480, 192511, 192450, 192488, 192543, 192517, 192460, 192456, 192492, 192494, 192473, 192453, 192545, 192527, 192498, 192451, 192472, 192475, 192515, 192486, 192536, 192471, 192487, 192485, 192546, 192452, 192550, 190127, 190130, 190133, 190128, 190129, 190134, 190193, 191945, 191963, 190189, 190195, 190197, 190199, 190206, 190205, 191941, 191959, 190191, 190187, 190203, 191951, 190188, 190202, 191946, 191950, 191955, 190192, 190198, 190201, 190204, 191957, 191958, 191961, 190190, 191953, 190194, 190196, 190200, 191948, 191949, 191952, 191960, 191962, 191944, 191947, 191954, 191956, 191964, 191943, 191942, 190164, 190165, 190166, 190171, 190172, 190173, 190174, 190176, 190167, 190168, 190169, 190170, 190175, 190177, 192585, 192586, 192587, 192588, 192589, 192590, 192591, 192592, 192593, 192594, 192595, 192596, 192597, 192598, 192599, 192600, 192601, 192602, 192603, 192604, 192605, 192606, 192607, 192608, 192609, 192610, 192717, 192718, 192611, 192612, 192613, 192614, 192615, 192616, 192617, 192618, 192619, 192620, 192621, 192622, 192623, 192624, 192625, 192626, 192627, 192628, 192629, 192630, 192631, 192632, 192719, 192633, 192634, 192635, 192636, 192637, 192638, 192639, 192640, 192641, 192642, 192643, 192644, 192645, 192646, 192647, 192648, 192649, 192650, 192651, 192652, 192701, 192653, 192702, 192654, 192655, 192656, 192657, 192658, 192659, 192703, 192660, 192661, 192662, 192663, 192664, 192665, 192666, 192667, 192704, 192668, 192720, 192669, 192670, 192671, 192672, 192673, 192674, 192675, 192676, 192677, 192678, 192679, 192680, 192681, 192682, 192683, 192684, 192705, 192685, 192686, 192687, 192688, 192689, 192690, 192721, 192691, 192692, 192722, 192693, 192723, 192694, 192695, 192696, 192706, 192697, 192698, 192699, 192724, 192700, 192725, 192730, 192748, 192735, 192739, 192741, 192744, 192749, 192751, 192733, 192745, 192734, 192736, 192740, 192746, 192747, 192731, 192732, 192737, 192738, 192742, 192743, 192750
-- 1,2,3
] a,

-- array -> bitmap
bitmapBuild(a) bitmap,-- 1355字节
-- bitmap -> array
bitmapToArray(bitmap) arr,-- UInt32 * 695 = 4*695 = 2780字节
-- array -> String
toString(arr) arrStr,-- 3868字节
-- 求基数:元素有多少个
bitmapCardinality(bitmap) cardinality,-- 695
-- 类型
toTypeName(bitmap) t_bitmap -- AggregateFunction(groupBitmap, UInt32)
;

-- 交集, 并集, 差集, 异或集
select
bitmapBuild([11,22,33]) b1,
bitmapBuild([22,33,44]) b2,

bitmapToArray(bitmapAnd(b1, b2)) bitmapAnd, -- 交集 [22, 33]
bitmapToArray(bitmapOr(b1, b2)) bitmapOr, -- 并集 [11, 22, 33, 44]
bitmapToArray(bitmapAndnot(b1, b2)) bitmapAndnot,-- 差集(1有2无) [11]
bitmapToArray(bitmapXor(b1, b2)) bitmapXor, -- 异或集(1有2无 或 2有1无) [11, 44]

bitmapAndCardinality(b1, b2) bitmapAndCardinality, -- 交集个数: 2
bitmapOrCardinality(b1, b2) bitmapOrCardinality, -- 并集个数: 4
bitmapAndnotCardinality(b1, b2) bitmapAndnotCardinality,-- 差集个数: 1
bitmapXorCardinality(b1, b2) bitmapXorCardinality, -- 异或集个数:2
''
;