目前开源ClickHouse的物化视图是同步视图,对于使用场景有如下限制:
为了更好的支持多表关联的场景,减少对于数据导入影响,一个比较好的解决方法是使用异步视图,其用途主要包括:
视图类型 | 单表聚合 | 多表关联 | 查询改写 | 刷新策略 | 数据导入影响 |
---|---|---|---|---|---|
异步物化视图 | 是 | 是 | 是 | 异步刷新 | 无 |
同步物化视图 | 是 | 否 | 是 | 导入同步刷新 | 有 |
--建异步视图 CREATE MATERIALIZED VIEW [IF NOT EXISTS] [database.]<mv_name> to [database.]<target_name> [REFRESH [ASYNC [START (start_time)] EVERY (INTERVAL refresh_interval) | MANUAL]] AS <query_statement> properties <mv_properties>; --系统命令,启动,停止视图刷新,激活,失效视图 SYSTEM [START VIEW | STOP VIEW | ACTIVATE VIEW | DEACTIVATE VIEW] [database.]<mv_name>; --手动刷新视图 REFRESH MATERIALIZED VIEW [database.]<mv_name>; --修改视图属性 ALTER TABLE [database.]<mv_name> MODIFY PROPERTY ...;
两表join异步物化视图的建表语句。
---创建数据库 DROP DATABASE IF EXISTS business on cluster test_shard_localhost sync; CREATE DATABASE IF NOT EXISTS business on cluster test_shard_localhost; ---维度表local表 DROP TABLE IF EXISTS business.goods_local on cluster test_shard_localhost sync; CREATE TABLE business.goods_local on cluster test_shard_localhost( create_date DateTime, series_id UInt64, goods_id Int64, item_name String, price Float64 ) ENGINE = HaMergeTree('/clickhouse/test_shard_localhost/business.goods_local/{shard}', '{replica}') PARTITION BY toDate(create_date) ORDER BY (goods_id, item_name) SETTINGS index_granularity = 8192; ---维度表分布式表 DROP TABLE IF EXISTS business.goods on cluster test_shard_localhost sync; CREATE TABLE business.goods on cluster test_shard_localhost ( create_date DateTime, series_id UInt64, goods_id Int64, item_name String, price Float64 ) ENGINE = Distributed('test_shard_localhost', 'business', 'goods_local', intHash64(goods_id)); ---事实表local表 DROP TABLE IF EXISTS business.order_list_local on cluster test_shard_localhost sync; CREATE TABLE business.order_list_local on cluster test_shard_localhost( order_id Int64, client_id Int64, goods_id Int64, shop_id UInt64, order_date DateTime ) ENGINE = HaMergeTree('/clickhouse/test_shard_localhost/business.order_list_local/{shard}', '{replica}') PARTITION BY (toDate(order_date), shop_id) ORDER BY (order_id,goods_id, client_id) SETTINGS index_granularity = 8192; ---事实表分布式表 DROP TABLE IF EXISTS business.order_list on cluster test_shard_localhost sync; CREATE TABLE business.order_list on cluster test_shard_localhost( order_id Int64, client_id Int64, goods_id Int64, shop_id UInt64, order_date DateTime ) ENGINE = Distributed('test_shard_localhost', 'business', 'order_list_local', intHash64(order_id)); --目的表local表 DROP TABLE IF EXISTS business.order_statistics_inner_join_local on cluster test_shard_localhost sync; CREATE TABLE business.order_statistics_inner_join_local on cluster test_shard_localhost (date Date, encoded_shop_id UInt64, encoded_goods_series_id UInt64, item_id Int64, total_price Float64, max_price Float64) ENGINE = HaMergeTree('/clickhouse/test_shard_localhost/business.order_statistics_inner_join_local/{shard}', '{replica}') PARTITION BY (date, toUInt64(encoded_shop_id + 10)) ORDER BY (date, encoded_shop_id, encoded_goods_series_id, item_id) settings index_granularity = 8192; --目的表分布式表 DROP TABLE IF EXISTS business.order_statistics_inner_join on cluster test_shard_localhost sync; CREATE TABLE business.order_statistics_inner_join on cluster test_shard_localhost (date Date, encoded_shop_id UInt64, encoded_goods_series_id UInt64, item_id Int64, total_price Float64, max_price Float64) ENGINE = Distributed('test_shard_localhost', 'business', 'order_statistics_inner_join_local', intHash64(item_id)); --创建异步物化视图 DROP TABLE IF EXISTS business.order_mv_inner_join on cluster test_shard_localhost sync; CREATE MATERIALIZED VIEW business.order_mv_inner_join on cluster test_shard_localhost TO business.order_statistics_inner_join REFRESH ASYNC START('2023-12-05 10:00:00') EVERY(INTERVAL 5 SECOND) AS SELECT date_trunc('month', toDate(order.order_date)) as date, multiIf(order.shop_id >= 2702, toUInt64(30000), shop_id <= 2701, toUInt64(40000), toUInt64(50000)) as encoded_shop_id, multiIf(good.series_id = 1, toUInt64(100), good.series_id = 2, toUInt64(101), good.series_id = 3, toUInt64(103), toUInt64(10)) as encoded_goods_series_id, order.order_id as item_id, sum(good.price) as total_price, max(good.price) as max_price FROM business.order_list as order inner JOIN business.goods as good ON good.goods_id = order.goods_id WHERE date > '2023-04-01' and date <= '2023-12-01' and order.shop_id >= 2702 GROUP BY date, encoded_shop_id, encoded_goods_series_id, item_id; --导入数据 INSERT INTO business.goods VALUES ('2023-11-01 08:32:01', 1, 1001,'apple',2366.5), ('2023-11-02 09:13:15', 1, 1002,'pear',829.0),('2023-11-03 15:06:45', 2, 1003,'potato',2238.2), ('2023-11-03 15:23:45', 3, 1004,'waterlemon',232.2),('2023-12-03 16:13:40', 4, 1108,'keybroad',2345.2); INSERT INTO business.order_list VALUES (10001,201,1001, 2701, '2023-02-16 13:30:01'), (10001,301,1002, 2702, '2023-12-17 20:16:15'), (10002,233,1002, 2703, '2023-06-20 09:42:02'), (10003,453,1003, 2701, '2023-05-23 16:23:34'), (10004,201,2001, 2701, '2023-02-16 13:30:01'), (10005,201,2001, 2701, '2023-02-16 13:30:01'); --命中视图 SELECT date_trunc('month', toDate(order.order_date)) AS date, multiIf(order.shop_id >= 2702, toUInt64(30000), shop_id <= 2701, toUInt64(40000), toUInt64(50000)) AS encoded_shop_id, multiIf(good.series_id = 1, toUInt64(100), good.series_id = 2, toUInt64(101), good.series_id = 3, toUInt64(103), toUInt64(10)) AS encoded_goods_series_id, order.order_id AS item_id, sum(good.price) AS total_price, max(good.price) AS max_price FROM business.order_list AS order INNER JOIN business.goods AS good ON good.goods_id = order.goods_id WHERE (date > '2023-04-01') AND (date <= '2023-12-01') AND (order.shop_id >= 2702) GROUP BY date, encoded_shop_id, encoded_goods_series_id, item_id ORDER BY date settings enable_optimizer=1;