EMR SparkSQL完全兼容开源SparkSQL语法,以下对基本的库表操作做一个说明,其他详细指南可以参考开源SparkSQL语法说明。
create database db_demo;
注意
要确保该TOS桶存在,并且当前用户有该桶路径的读写权限。
create database db_demo location 'tos://您的tos bucket name/warehouse/';
desc database db_demo;
drop database db_demo;
create table tb_demo(id int, name string);
desc table tb_demo;
drop table tb_demo;
insert into tb_demo select 1,'name1';
select * from tb_demo;
上传jar包
创建udf
CREATE FUNCTION <schemaName>.<functionName> AS '<funcClassName>' using jar 'tos://您的tos bucket name/您的jar包地址';
select <schemaName>.<functionName> ('aaabbB')
set spark.sql.catalog.spark_catalog=org.apache.spark.sql.hudi.catalog.HoodieCatalog; set spark.serializer=org.apache.spark.serializer.KryoSerializer;
CREATE DATABASE IF NOT EXISTS hudi_db location 'tos://您的tos bucket name/warehouse/hudi_db/'; use hudi_db; create table if not exists test_hudi_tb ( id bigint, name string ) using hudi; insert into test_hudi_tb select 1, 'a1'; select * from test_hudi_tb; drop table test_hudi_tb;
set spark.sql.catalog.iceberg=org.apache.iceberg.spark.SparkCatalog; set spark.sql.catalog.iceberg.type=hive; set spark.sql.storeAssignmentPolicy=ansi;
use iceberg; CREATE DATABASE IF NOT EXISTS iceberg_db location 'tos://您的tos bucket name/warehouse/iceberg_db/'; use iceberg_db; create table if not exists test_iceberg_tb ( id bigint, name string ) using iceberg; insert into test_iceberg_tb select 1, 'a1'; select * from test_iceberg_tb; drop table test_iceberg_tb;
set spark.sql.catalog.spark_catalog=org.apache.spark.sql.delta.catalog.DeltaCatalog;
CREATE DATABASE IF NOT EXISTS delta_db location 'tos://您的tos bucket name/warehouse/delta_db/'; use delta_db; create table if not exists test_delta_tb ( id bigint, name string ) using delta; insert into test_delta_tb select 1, 'a1'; select * from test_delta_tb; drop table test_delta_tb;
set spark.sql.catalog.paimon=org.apache.paimon.spark.SparkCatalog; set spark.sql.catalog.paimon.metastore=hive; -- 自定义warehouse路径 set spark.sql.catalog.paimon.warehouse=tos://您的tos bucket name/warehouse; set spark.sql.storeAssignmentPolicy=ansi;
use paimon; CREATE DATABASE IF NOT EXISTS paimon_db location 'tos://您的tos bucket name/warehouse/paimon_db/'; use paimon_db; create table if not exists test_paimon_tb ( id bigint, name string ) using paimon; insert into test_paimon_tb select 1, 'a1'; select * from test_paimon_tb; drop table test_paimon_tb;