You need to enable JavaScript to run this app.
导航
基础使用
最近更新时间:2024.05.13 18:56:15首次发布时间:2024.05.13 18:56:15

1 基础库表操作

EMR SparkSQL完全兼容开源SparkSQL语法,以下对基本的库表操作做一个说明,其他详细指南可以参考开源SparkSQL语法说明

1.1 数据库操作

  1. 创建数据库
create database db_demo;
  1. 创建数据库,指定自定义TOS桶路径进行存储。

注意

要确保该TOS桶存在,并且当前用户有该桶路径的读写权限。

create database db_demo location 'tos://您的tos bucket name/warehouse/';
  1. 查看数据库信息
desc database db_demo;
  1. 删除数据库
drop database db_demo;

1.2 表操作

  1. 创建表
create table tb_demo(id int, name string);
  1. 描述表信息
desc table tb_demo;
  1. 删除表
drop table tb_demo;
  1. 插入数据
insert into tb_demo select 1,'name1';
  1. 查询表数据
select * from tb_demo;

1.3 UDF操作

  1. 上传jar包

  2. 创建udf

CREATE FUNCTION <schemaName>.<functionName> AS '<funcClassName>' using jar 'tos://您的tos bucket name/您的jar包地址';
  1. 使用udf
select <schemaName>.<functionName> ('aaabbB')

2 Spark 访问 Hudi

2.1 配置

set spark.sql.catalog.spark_catalog=org.apache.spark.sql.hudi.catalog.HoodieCatalog;
set spark.serializer=org.apache.spark.serializer.KryoSerializer;

2.2 基础操作

CREATE DATABASE IF NOT EXISTS hudi_db location 'tos://您的tos bucket name/warehouse/hudi_db/';

use hudi_db;

create table if not exists test_hudi_tb (
    id bigint,
    name string
) using hudi;

insert into test_hudi_tb select 1, 'a1';

select * from test_hudi_tb;

drop table test_hudi_tb;

3 Spark 访问 Iceberg

3.1 配置

set spark.sql.catalog.iceberg=org.apache.iceberg.spark.SparkCatalog;
set spark.sql.catalog.iceberg.type=hive;
set spark.sql.storeAssignmentPolicy=ansi;

3.2 基础操作

use iceberg;

CREATE DATABASE IF NOT EXISTS iceberg_db location 'tos://您的tos bucket name/warehouse/iceberg_db/';

use iceberg_db;

create table if not exists test_iceberg_tb (
    id bigint,
    name string
) using iceberg;

insert into test_iceberg_tb select 1, 'a1';

select * from test_iceberg_tb;

drop table test_iceberg_tb;

4 Spark 访问 DeltaLake

4.1 配置

set spark.sql.catalog.spark_catalog=org.apache.spark.sql.delta.catalog.DeltaCatalog;

4.2 基础操作

CREATE DATABASE IF NOT EXISTS delta_db location 'tos://您的tos bucket name/warehouse/delta_db/';

use delta_db;

create table if not exists test_delta_tb (
    id bigint,
    name string
) using delta;

insert into test_delta_tb select 1, 'a1';

select * from test_delta_tb;

drop table test_delta_tb;

5 Spark 访问 Paimon

5.1 配置

set spark.sql.catalog.paimon=org.apache.paimon.spark.SparkCatalog;
set spark.sql.catalog.paimon.metastore=hive;
-- 自定义warehouse路径
set spark.sql.catalog.paimon.warehouse=tos://您的tos bucket name/warehouse;
set spark.sql.storeAssignmentPolicy=ansi;

5.2 基础操作

use paimon;

CREATE DATABASE IF NOT EXISTS paimon_db location 'tos://您的tos bucket name/warehouse/paimon_db/';

use paimon_db;

create table if not exists test_paimon_tb (
    id bigint,
    name string
) using paimon;

insert into test_paimon_tb select 1, 'a1';

select * from test_paimon_tb;

drop table test_paimon_tb;