目前开源ClickHouse的物化视图是同步视图,对于使用场景有如下限制:
为了更好的支持多表关联的场景,减少对于数据导入影响,一个比较好的解决方法是使用异步视图,其用途主要包括:
视图类型 | 单表聚合 | 多表关联 | 查询改写 | 刷新策略 | 数据导入影响 |
---|---|---|---|---|---|
异步物化视图 | 是 | 是 | 是 | 异步刷新 | 无 |
同步物化视图 | 是 | 否 | 是 | 导入同步刷新 | 有 |
--建异步视图
CREATE MATERIALIZED VIEW [IF NOT EXISTS] [database.]<mv_name> to [database.]<target_name> [REFRESH [ASYNC [START (start_time)] EVERY (INTERVAL refresh_interval) | MANUAL]] AS <query_statement> properties <mv_properties>;
--系统命令,启动,停止视图刷新,激活,失效视图
SYSTEM [START VIEW | STOP VIEW | ACTIVATE VIEW | DEACTIVATE VIEW] [database.]<mv_name>;
--手动刷新视图
REFRESH MATERIALIZED VIEW [database.]<mv_name>;
--修改视图属性
ALTER TABLE [database.]<mv_name> MODIFY PROPERTY ...;
两表join异步物化视图的建表语句。
---创建数据库
DROP DATABASE IF EXISTS business on cluster test_shard_localhost sync;
CREATE DATABASE IF NOT EXISTS business on cluster test_shard_localhost;
---维度表local表
DROP TABLE IF EXISTS business.goods_local on cluster test_shard_localhost sync;
CREATE TABLE business.goods_local on cluster test_shard_localhost(
create_date DateTime,
series_id UInt64,
goods_id Int64,
item_name String,
price Float64
) ENGINE = HaMergeTree('/clickhouse/test_shard_localhost/business.goods_local/{shard}', '{replica}') PARTITION BY toDate(create_date) ORDER BY (goods_id, item_name) SETTINGS index_granularity = 8192;
---维度表分布式表
DROP TABLE IF EXISTS business.goods on cluster test_shard_localhost sync;
CREATE TABLE business.goods on cluster test_shard_localhost (
create_date DateTime,
series_id UInt64,
goods_id Int64,
item_name String,
price Float64
) ENGINE = Distributed('test_shard_localhost', 'business', 'goods_local', intHash64(goods_id));
---事实表local表
DROP TABLE IF EXISTS business.order_list_local on cluster test_shard_localhost sync;
CREATE TABLE business.order_list_local on cluster test_shard_localhost(
order_id Int64,
client_id Int64,
goods_id Int64,
shop_id UInt64,
order_date DateTime
) ENGINE = HaMergeTree('/clickhouse/test_shard_localhost/business.order_list_local/{shard}', '{replica}') PARTITION BY (toDate(order_date), shop_id) ORDER BY (order_id,goods_id, client_id) SETTINGS index_granularity = 8192;
---事实表分布式表
DROP TABLE IF EXISTS business.order_list on cluster test_shard_localhost sync;
CREATE TABLE business.order_list on cluster test_shard_localhost(
order_id Int64,
client_id Int64,
goods_id Int64,
shop_id UInt64,
order_date DateTime
) ENGINE = Distributed('test_shard_localhost', 'business', 'order_list_local', intHash64(order_id));
--目的表local表
DROP TABLE IF EXISTS business.order_statistics_inner_join_local on cluster test_shard_localhost sync;
CREATE TABLE business.order_statistics_inner_join_local on cluster test_shard_localhost (date Date, encoded_shop_id UInt64, encoded_goods_series_id UInt64, item_id Int64, total_price Float64, max_price Float64) ENGINE = HaMergeTree('/clickhouse/test_shard_localhost/business.order_statistics_inner_join_local/{shard}', '{replica}') PARTITION BY (date, toUInt64(encoded_shop_id + 10)) ORDER BY (date, encoded_shop_id, encoded_goods_series_id, item_id) settings index_granularity = 8192;
--目的表分布式表
DROP TABLE IF EXISTS business.order_statistics_inner_join on cluster test_shard_localhost sync;
CREATE TABLE business.order_statistics_inner_join on cluster test_shard_localhost (date Date, encoded_shop_id UInt64, encoded_goods_series_id UInt64, item_id Int64, total_price Float64, max_price Float64) ENGINE = Distributed('test_shard_localhost', 'business', 'order_statistics_inner_join_local', intHash64(item_id));
--创建异步物化视图
DROP TABLE IF EXISTS business.order_mv_inner_join on cluster test_shard_localhost sync;
CREATE MATERIALIZED VIEW business.order_mv_inner_join on cluster test_shard_localhost TO business.order_statistics_inner_join
REFRESH ASYNC START('2023-12-05 10:00:00') EVERY(INTERVAL 5 SECOND)
AS SELECT
date_trunc('month', toDate(order.order_date)) as date,
multiIf(order.shop_id >= 2702, toUInt64(30000), shop_id <= 2701, toUInt64(40000), toUInt64(50000)) as encoded_shop_id,
multiIf(good.series_id = 1, toUInt64(100), good.series_id = 2, toUInt64(101), good.series_id = 3, toUInt64(103), toUInt64(10)) as encoded_goods_series_id,
order.order_id as item_id,
sum(good.price) as total_price,
max(good.price) as max_price
FROM business.order_list as order inner JOIN business.goods as good ON good.goods_id = order.goods_id WHERE date > '2023-04-01' and date <= '2023-12-01' and order.shop_id >= 2702 GROUP BY date, encoded_shop_id, encoded_goods_series_id, item_id;
--导入数据
INSERT INTO business.goods
VALUES ('2023-11-01 08:32:01', 1, 1001,'apple',2366.5), ('2023-11-02 09:13:15', 1, 1002,'pear',829.0),('2023-11-03 15:06:45', 2, 1003,'potato',2238.2), ('2023-11-03 15:23:45', 3, 1004,'waterlemon',232.2),('2023-12-03 16:13:40', 4, 1108,'keybroad',2345.2);
INSERT INTO business.order_list
VALUES
(10001,201,1001, 2701, '2023-02-16 13:30:01'),
(10001,301,1002, 2702, '2023-12-17 20:16:15'),
(10002,233,1002, 2703, '2023-06-20 09:42:02'),
(10003,453,1003, 2701, '2023-05-23 16:23:34'),
(10004,201,2001, 2701, '2023-02-16 13:30:01'),
(10005,201,2001, 2701, '2023-02-16 13:30:01');
--命中视图
SELECT
date_trunc('month', toDate(order.order_date)) AS date,
multiIf(order.shop_id >= 2702, toUInt64(30000), shop_id <= 2701, toUInt64(40000), toUInt64(50000)) AS encoded_shop_id,
multiIf(good.series_id = 1, toUInt64(100), good.series_id = 2, toUInt64(101), good.series_id = 3, toUInt64(103), toUInt64(10)) AS encoded_goods_series_id,
order.order_id AS item_id,
sum(good.price) AS total_price,
max(good.price) AS max_price
FROM business.order_list AS order
INNER JOIN business.goods AS good ON good.goods_id = order.goods_id
WHERE (date > '2023-04-01') AND (date <= '2023-12-01') AND (order.shop_id >= 2702)
GROUP BY
date,
encoded_shop_id,
encoded_goods_series_id,
item_id ORDER BY date settings enable_optimizer=1;