在 Apache Spark SQL 中,DDL(Data Definition Language)语句用于定义和修改数据库的结构。以下是一些 Spark SQL DDL 语句的用法示例,包括执行语句和相应的执行结果:
创建一个新的数据库。
CREATE { DATABASE | SCHEMA } [ IF NOT EXISTS ] database_name [ COMMENT database_comment ] [ LOCATION database_directory ] [ WITH DBPROPERTIES ( property_name = property_value [ , ... ] ) ]
CREATE DATABASE IF NOT EXISTS my_database;
切换当前使用的数据库。
USE database_name
USE my_database;
修改数据库的属性。
ALTER { DATABASE | SCHEMA } database_nameSET DBPROPERTIES ( property_name = property_value [ , ... ] )
ALTER DATABASE my_database SET DBPROPERTIES ('property_name'='property_value');
删除一个数据库。
DROP { DATABASE | SCHEMA } [ IF EXISTS ] dbname [ RESTRICT | CASCADE ]
DROP DATABASE IF EXISTS my_database CASCADE;
创建一个新的表。
CREATE TABLE [ IF NOT EXISTS ] table_identifier[ ( col_name1 col_type1 [ COMMENT col_comment1 ], ... ) ] USING data_source [ OPTIONS ( key1=val1, key2=val2, ... ) ] [ PARTITIONED BY ( col_name1, col_name2, ... ) ] [ CLUSTERED BY ( col_name3, col_name4, ... ) [ SORTED BY ( col_name [ ASC | DESC ], ... ) ] INTO num_buckets BUCKETS ] [ LOCATION path ] [ COMMENT table_comment ] [ TBLPROPERTIES ( key1=val1, key2=val2, ... ) ][ AS select_statement ]
CREATE [ EXTERNAL ] TABLE [ IF NOT EXISTS ] table_identifier [ ( col_name1[:] col_type1 [ COMMENT col_comment1 ], ... ) ] [ COMMENT table_comment ] [ PARTITIONED BY ( col_name2[:] col_type2 [ COMMENT col_comment2 ], ... ) | ( col_name1, col_name2, ... ) ] [ CLUSTERED BY ( col_name1, col_name2, ...) [ SORTED BY ( col_name1 [ ASC | DESC ], col_name2 [ ASC | DESC ], ... ) ] INTO num_buckets BUCKETS ] [ ROW FORMAT row_format ] [ STORED AS file_format ] [ LOCATION path ] [ TBLPROPERTIES ( key1=val1, key2=val2, ... ) ] [ AS select_statement ] row_format: : SERDE serde_class [ WITH SERDEPROPERTIES (k1=v1, k2=v2, ... ) ] | DELIMITED [ FIELDS TERMINATED BY fields_terminated_char [ ESCAPED BY escaped_char ] ] [ COLLECTION ITEMS TERMINATED BY collection_items_terminated_char ] [ MAP KEYS TERMINATED BY map_key_terminated_char ] [ LINES TERMINATED BY row_terminated_char ] [ NULL DEFINED AS null_char ]
CREATE TABLE [IF NOT EXISTS] table_identifier LIKE source_table_identifier USING data_source[ ROW FORMAT row_format ] [ STORED AS file_format ] [ TBLPROPERTIES ( key1=val1, key2=val2, ... ) ] [ LOCATION path ]
CREATE TABLE [IF NOT EXISTS] my_database.my_table ( id INT, name STRING, value DOUBLE );
IF NOT EXISTS
且表已经存在时,则报错提示表已经存在。修改表的结构,例如添加或替换列。
ALTER TABLE table_identifier RENAME TO table_identifier ALTER TABLE table_identifier partition_spec RENAME TO partition_spec
ALTER TABLE my_database.my_table ADD COLUMNS (new_col1 STRING, new_col2 INT);
删除表中的所有数据。
TRUNCATE TABLE table_identifier [ partition_spec ]
TRUNCATE TABLE my_database.my_table;
修复损坏的表。
MSCK REPAIR TABLE table_identifier
REPAIR TABLE my_database.my_table;
删除一个表。
DROP TABLE [ IF EXISTS ] table_identifier
DROP TABLE IF EXISTS my_database.my_table;
创建一个新的视图。
CREATE [ OR REPLACE ] [ [ GLOBAL ] TEMPORARY ] VIEW [ IF NOT EXISTS ] view_identifiercreate_view_clauses AS query
CREATE VIEW [IF NOT EXISTS] my_database.my_view AS SELECT name, SUM(value) AS total_value FROM my_database.my_table GROUP BY name;
IF NOT EXISTS
且视图已经存在时,则报错提示视图已经存在。修改视图的定义。
ALTER VIEW view_identifier RENAME TO view_identifier
ALTER VIEW my_database.my_view AS SELECT name, SUM(value) AS total_value, id FROM my_database.my_table GROUP BY name, id;
删除一个视图。
DROP VIEW [ IF EXISTS ] view_identifier
DROP VIEW IF EXISTS my_database.my_view;
创建一个自定义的 Scala 或 Java 函数。
CREATE [ OR REPLACE ] [ TEMPORARY ] FUNCTION [ IF NOT EXISTS ]function_name AS class_name [ resource_locations ]
CREATE FUNCTION [IF NOT EXISTS] my_database.my_function AS 'full.class.name.of.your.UDAF';
IF NOT EXISTS
且函数已经存在时,则报错提示函数已经存在。删除一个自定义函数。
DROP [ TEMPORARY ] FUNCTION [ IF EXISTS ] function_name
DROP FUNCTION IF EXISTS my_database.my_function;