1. XML方式配置字典
1. 准备测试数据
生成字典的数据源,这里将字典的数据存储在ClickHouse的t_dic_ch表中:
drop table t_dic_ch;
create table t_dic_ch(id UInt64, code String, name String, age UInt8) ENGINE=TinyLog;
insert into t_dic_ch values(1,'id001','xiaohe',23),(2,'id002','xiaoxue',25),(3,'id003','xiaoyu',26),(4,'id004','xiaoxi',27);
查看表数据:
SELECT * FROM t_dic_ch;
┌─id─┬─code──┬─name────┬─age─┐
│ 1 │ id001 │ xiaohe │ 23 │
│ 2 │ id002 │ xiaoxue │ 25 │
│ 3 │ id003 │ xiaoyu │ 26 │
│ 4 │ id004 │ xiaoxi │ 27 │
└────┴───────┴─────────┴─────┘
2. 准备配置文件
文件名为:file_dictionary.xml, 存放在目录:/etc/clickhouse-server/, 文件内容如下:
<yandex>
<dictionary>
<name>t_dict_person_xml</name>
<database>default</database>
<structure>
<id>
<name>id</name>
</id>
<attribute>
<name>code</name>
<type>String</type>
<null_value/>
</attribute>
<attribute>
<name>name</name>
<type>String</type>
<null_value/>
</attribute>
<attribute>
<name>age</name>
<type>UInt8</type>
<null_value/>
</attribute>
</structure>
<source>
<clickhouse>
<host>localhost</host>
<port>9001</port>
<user>default</user>
<password></password>
<db>default</db>
<table>t_dic_ch</table>
</clickhouse>
</source>
<layout>
<hashed/>
</layout>
<lifetime>10</lifetime>
</dictionary>
</yandex>
3. 字典功能验证
select * from system.dictionaries where name='t_dict_person_xml';\G
Row 1:
──────
database:
name: t_dict_person_xml
status: LOADED
origin: /etc/clickhouse-server/file_dictionary.xml
type: Hashed
key: UInt64
attribute.names: ['code','name','age']
attribute.types: ['String','String','UInt8']
bytes_allocated: 25136
query_count: 1
hit_rate: 1
element_count: 12
load_factor: 0.046875
source: ClickHouse: default.t_dic_ch
lifetime_min: 10
lifetime_max: 10
loading_start_time: 2020-05-25 15:07:20
loading_duration: 0.001
last_exception:
1 rows in set. Elapsed: 0.004 sec.
验证字典的使用:
SELECT dictGetString('t_dict_person_xml', 'name', toUInt64(2)) AS regionName;
┌─regionName─┐
│ 'xiaoxue' │
└────────────┘
2. DDL语句配置字典
DROP DICTIONARY t_dict_person_ddl;
CREATE DICTIONARY t_dict_person_ddl
(
id UInt64,
code String,
name String,
age UInt8
)
PRIMARY KEY id
SOURCE(CLICKHOUSE(
host 'localhost'
port 9001
user 'default'
db 'default'
password ''
table 't_dic_ch'
where 'id>0'
))
LAYOUT(FLAT())
LIFETIME(30);
验证:
select * from system.dictionaries where name='t_dict_person_ddl';\G
Row 1:
──────
database: default
name: t_dict_person_ddl
status: NOT_LOADED
origin: default.t_dict_person_ddl
type:
key:
attribute.names: []
attribute.types: []
bytes_allocated: 0
query_count: 0
hit_rate: 0
element_count: 0
load_factor: 0
source:
lifetime_min: 0
lifetime_max: 0
loading_start_time: 0000-00-00 00:00:00
loading_duration: 0
last_exception:
1 rows in set. Elapsed: 0.004 sec.
SELECT dictGetString('default.t_dict_person_ddl', 'name', toUInt64(2)) AS regionName;
┌─regionName─┐
│ 'xiaoxue' │
└────────────┘
1 rows in set. Elapsed: 0.003 sec.
Note:以下测试均在default数据库。
1. flat/hash/sparse_hash/cache
DROP DICTIONARY t_dict_person_ddl;
CREATE DICTIONARY t_dict_person_ddl
(
id UInt64,
code String,
name String,
age UInt8
)
PRIMARY KEY id
SOURCE(CLICKHOUSE(
host 'localhost'
port 9001
user 'default'
db 'default'
password ''
table 't_dic_ch'
where 'id>0'
))
LAYOUT(CACHE(SIZE_IN_CELLS 10000))
LIFETIME(30);
FLAT()、HASHED()、SPARSE_HASHED()、CACHE(SIZE_IN_CELLS 10000)
SELECT dictGetString('default.t_dict_person_ddl', 'name', toUInt64(2)) AS regionName;
2. complex_key_hashed/complex_key_cache
DROP DICTIONARY t_dict_person_ddl;
CREATE DICTIONARY t_dict_person_ddl
(
id UInt64,
code String,
name String,
age UInt8
)
PRIMARY KEY id,code
SOURCE(CLICKHOUSE(
host 'localhost'
port 9001
user 'default'
db 'default'
password ''
table 't_dic_ch'
where 'id>0'
))
LAYOUT(COMPLEX_KEY_HASHED())
LIFETIME(30);
COMPLEX_KEY_HASHED()、COMPLEX_KEY_CACHE(SIZE_IN_CELLS 10000)
SELECT dictGet('default.t_dict_person_ddl', 'name', tuple(toUInt64(2), 'id002')) AS name;
3. range_hashed
drop table t_hash_range;
create table t_hash_range(id UInt64, start Date, end Date, amount Float32) ENGINE=TinyLog;
insert into t_hash_range values
(123, '2020-03-20', '2020-03-22', 0.15)
(123, '2020-03-23', '2020-03-27', 0.25)
(456, '2020-04-20', '2020-04-30', 0.35)
;
查看数据:
SELECT * FROM t_hash_range;
┌──id─┬──────start─┬────────end─┬─amount─┐
│ 123 │ 2020-03-20 │ 2020-03-22 │ 0.15 │
│ 123 │ 2020-03-23 │ 2020-03-27 │ 0.25 │
│ 456 │ 2020-04-20 │ 2020-04-30 │ 0.35 │
└─────┴────────────┴────────────┴────────┘
创建字典:
DROP DICTIONARY t_dict_hash_range;
CREATE DICTIONARY t_dict_hash_range (
id UInt64,
start Date,
end Date,
amount Float32
)
PRIMARY KEY id
SOURCE(CLICKHOUSE(
host 'localhost'
port 9001
user 'default'
db 'default'
password ''
table 't_hash_range'
))
LAYOUT(RANGE_HASHED())
RANGE(MIN start MAX end)
LIFETIME(30);
查看id为123的记录,在日期2020-03-21日的amount:
select dictGetFloat32('default.t_dict_hash_range', 'amount', toUInt64(123), toDate('2020-03-21')) as amount;
查看id为123的记录,在日期2020-03-25日的amount:
select dictGetFloat32('default.t_dict_hash_range', 'amount', toUInt64(123), toDate('2020-03-25')) as amount;
日期之外的记录:
SELECT dictGetFloat32('default.t_dict_hash_range', 'amount', toUInt64(123), toDate('2020-03-29')) AS amount;
4. ip_tire
创建表和测试数据:
drop table t_ip_tire;
create table t_ip_tire(prefix String, asn UInt32, ccode String) ENGINE=TinyLog;
insert into t_ip_tire values
('202.79.32.0/20', 17501, 'NP')
('2620:0:870::/48', 3856, 'US')
('2a02:6b8:1::/48', 13238, 'RU')
('2001:db8::/32', 65536, 'ZZ')
;
查看数据:
SELECT * FROM t_ip_tire;
┌─prefix──────────┬───asn─┬─ccode─┐
│ 202.79.32.0/20 │ 17501 │ NP │
│ 2620:0:870::/48 │ 3856 │ US │
│ 2a02:6b8:1::/48 │ 13238 │ RU │
│ 2001:db8::/32 │ 65536 │ ZZ │
└─────────────────┴───────┴───────┘
创建字典:
DROP DICTIONARY t_dict_ip_tire;
CREATE DICTIONARY t_dict_ip_tire (
prefix String,
asn UInt32,
ccode String
)
PRIMARY KEY prefix
SOURCE(CLICKHOUSE(
host 'localhost'
port 9001
user 'default'
db 'default'
password ''
table 't_ip_tire'
))
LAYOUT(IP_TRIE())
LIFETIME(30);
检索数据:
select
dictGetUInt32('default.t_dict_ip_tire', 'asn', tuple(IPv4StringToNum('202.79.32.22'))) as asn,
dictGetString('default.t_dict_ip_tire', 'ccode', tuple(IPv4StringToNum('202.79.32.22'))) as ccode
;
Note:以下测试均在default数据库。
1. 文件数据源
TabSeparated格式
文件示例:
准备测试数据
文件命名为person.tsv,存放在目录:/var/lib/clickhouse/user_files,字段之间使用制表符分隔,即格式为TabSeparated。数据如下:
1 'id001' 'xiaohe' 23
2 'id002' 'xiaoxue' 25
3 'id003' 'xiaoyu' 26
4 'id004' 'xiaoxi' 27
创建字典:
DROP DICTIONARY t_dict_person_ddl;
CREATE DICTIONARY t_dict_person_ddl
(
id UInt64,
code String,
name String,
age UInt8
)
PRIMARY KEY id
SOURCE(FILE(path '/var/lib/clickhouse/user_files/person.tsv' format 'TabSeparated'))
LAYOUT(FLAT())
LIFETIME(30);
SELECT dictGetString('default.t_dict_person_ddl', 'name', toUInt64(2)) AS regionName;
2. Executable File
创建字典:
DROP DICTIONARY restricted_dict;
CREATE DICTIONARY restricted_dict (
key UInt64,
value String
)
PRIMARY KEY key
SOURCE(EXECUTABLE(COMMAND 'echo -E "1\thello"' FORMAT TabSeparated))
LIFETIME(MIN 0 MAX 1)
LAYOUT(CACHE(SIZE_IN_CELLS 10));
select dictGetString('default.restricted_dict', 'value', toUInt64(1));
select * from system.dictionaries where name='restricted_dict';\G
SELECT *
FROM system.dictionaries
WHERE name = 'restricted_dict'
Row 1:
──────
database: default
name: restricted_dict
status: FAILED
origin: default.restricted_dict
type:
key:
attribute.names: []
attribute.types: []
bytes_allocated: 0
query_count: 0
hit_rate: 0
element_count: 0
load_factor: 0
source:
lifetime_min: 0
lifetime_max: 0
loading_start_time: 2020-05-25 15:36:55
loading_duration: 0
last_exception: Code: 482, e.displayText() = DB::Exception: Dictionaries with Executable dictionary source is not allowed (version 20.1.4.14 (official build))
1 rows in set. Elapsed: 0.004 sec.
配置文件:
<yandex>
<dictionary>
<name>t_dict_executable</name>
<structure>
<id>
<name>id</name>
</id>
<attribute>
<name>code</name>
<type>String</type>
<null_value/>
</attribute>
<attribute>
<name>name</name>
<type>String</type>
<null_value/>
</attribute>
<attribute>
<name>age</name>
<type>UInt8</type>
<null_value/>
</attribute>
</structure>
<source>
<executable>
<command>cat /var/lib/clickhouse/user_files/person.tsv</command>
<format>TabSeparated</format>
</executable>
</source>
<layout>
<hashed/>
</layout>
<lifetime>10</lifetime>
</dictionary>
</yandex>
select dictGetString('t_dict_executable', 'name', toUInt64(1));
3. MySQL
在mysql数据库创建表并插入数据:
drop table test.test_dc;
create table test.test_dc(
id bigint,
name varchar(100),
age int,
PRIMARY KEY (id)
);
insert into test.test_dc values(1, 'flink', 4);
insert into test.test_dc values(2, 'spark', 6);
insert into test.test_dc values(3, 'clickhouse', 5);
查看MySQL数据:
mysql> select * from test.test_dc;
+----+------------+------+
| id | name | age |
+----+------------+------+
| 1 | flink | 4 |
| 2 | spark | 6 |
| 3 | clickhouse | 5 |
+----+------------+------+
3 rows in set (0.00 sec)
在ClickHouse创建字典:
DROP DICTIONARY mydicmysql;
CREATE DICTIONARY mydicmysql
(
id UInt64,
name String,
age UInt8
)
PRIMARY KEY id
SOURCE(MYSQL(
port 3306
user 'root'
password '123456'
replica(host '127.0.0.1' priority 1)
replica(host '127.0.0.1' priority 1)
db 'test'
table 'test_dc'
invalidate_query 'select age from test_dc where id=3'
))
LAYOUT(FLAT())
LIFETIME(5);
select dictGet('default.mydicmysql', 'name', toUInt64(2)) as name;
4. ClickHouse
参见前面案例。
在mysql数据库创建表并插入数据:
drop table test.test_dc;
create table test.test_dc(
id bigint,
name varchar(100),
age int,
PRIMARY KEY (id)
);
insert into test.test_dc values(1, 'flink', 4);
insert into test.test_dc values(2, 'spark', 6);
insert into test.test_dc values(3, 'clickhouse', 5);
查看MySQL数据:
mysql> select * from test.test_dc;
+----+------------+------+
| id | name | age |
+----+------------+------+
| 1 | flink | 4 |
| 2 | spark | 6 |
| 3 | clickhouse | 5 |
+----+------------+------+
3 rows in set (0.00 sec)
在ClickHouse创建字典:
DROP DICTIONARY mydicmysql;
CREATE DICTIONARY mydicmysql
(
id UInt64,
name String,
age UInt8
)
PRIMARY KEY id
SOURCE(MYSQL(
port 3306
user 'root'
password '123456'
replica(host '127.0.0.1' priority 1)
replica(host '127.0.0.1' priority 1)
db 'test'
table 'test_dc'
invalidate_query 'select age from test.test_dc where id=3'
))
LAYOUT(FLAT())
LIFETIME(MIN 3 MAX 5);
select dictGet('default.mydicmysql', 'name', toUInt64(2)) as name;
更改mysql的age字段生效。
创建表和数据:
drop table t_region;
create table t_region(region_id UInt64, parent_region UInt64, region_name String) ENGINE=TinyLog;
insert into t_region values
(1, 0, 'jiangsu'),(2, 1, 'suzhou'),(3, 2, 'huqiu'),(4, 0, 'anhui'),(5, 4, 'hefei');
查看数据:
SELECT * FROM t_region;
┌─region_id─┬─parent_region─┬─region_name─┐
│ 1 │ 0 │ jiangsu │
│ 2 │ 1 │ suzhou │
│ 3 │ 2 │ huqiu │
│ 4 │ 0 │ anhui │
│ 5 │ 4 │ hefei │
└───────────┴───────────────┴─────────────┘
创建字典, 指定HIERARCHICAL字段:
DROP DICTIONARY t_dict_region;
CREATE DICTIONARY t_dict_region (
region_id UInt64,
parent_region UInt64 HIERARCHICAL,
region_name String
)
PRIMARY KEY region_id
SOURCE(CLICKHOUSE(
host 'localhost'
port 9001
user 'default'
db 'default'
password ''
table 't_region'
))
LAYOUT(HASHED())
LIFETIME(30);
SELECT dictGetString('default.t_dict_region', 'region_name', toUInt64(2)) AS regionName;
┌─regionName─┐
│ suzhou │
└────────────┘
SELECT dictGetHierarchy('default.t_dict_region', toUInt64(3));
┌─dictGetHierarchy('default.t_dict_region', toUInt64(3))─┐
│ [3,2,1] │
└────────────────────────────────────────────────────────┘
全部评论