理论上 ByteHouse 支持的 python sdk 都可以使用。这里以 clickhouse_connect 为例说明如何通过 python 进行 vector search 相关操作
from clickhouse_connect import get_client client = get_client(host="server", # server ip port=9000, # server port user="test", # user password="password", # password compress='zstd', # compress method, zstd recommanded send_receive_timeout=1000) # connect timeout
schema = f"""\ CREATE TABLE IF NOT EXISTS {database}.{table}( id UInt64, embedding Array(Float32), CONSTRAINT cons_vec_len CHECK length(embedding) = {dim}, INDEX vec_idx embedding TYPE HNSW('METRIC={metric.upper()}, DIM={dim}') ) ENGINE = {engine} ORDER BY id\ """ client.command(self.schema)
# embeddings(list[list[float]]): list of embeddings # ids(list[int]): list of ids data = zip(ids, embeddings) values = [list(elem) for elem in data] client.insert(f'{database}.{table}', values, column_names=['id', 'embedding'], column_type_names=['UInt64', 'Array(Float32)'])
# query: list[float] q_str = f""" SELECT id FROM {database}.{collection} ORDER BY {metric}Distance(embedding, {str(query)}) LIMIT {k} settings enable_new_ann=1, hnsw_ef_s={search_param["ef"]} """ results = client.query(q_str) result_ids = [int(id) for id in results.result_columns[0]]