蜂巢SQL汇总 - 行业资讯 - 肥雀云_南京肥雀信息技术有限公司

) ([^]) ([^]) ([0 - 9]) ([^] *)“
)存储为文本文件;

分区表(相当于表的子目录)

创建表的表(名称字符串)分区(密钥类型,…)

创建外部表员工(
名称字符串,
工资浮动,
下属array,
扣除map<字符串,float>,
地址struct<街:字符串,城市:字符串,状态:字符串,邮政编码:int)
)
分区由(dt字符串,字符串类型)
行格式分隔字段终止由“\ t”
收集项目终止的”、“
映射键终止由‘:’
行终止,' \ n '
存储为文本文件
/数据的位置;
数据格式:
wang , 123,, a1, a2, a3 , k1:1, k2:2, k3:3 , s1, s2、s3, 4

查看分区:
显示分区员工
增加分区

alter table员工添加如果不存在分区(国家=皒xx”(国家='多'])
删除分区
alter table雇员if 下降;,存在分区(国家=皒xx”(国家='多'])

动态分区:
1。不需要为不同的分区添加不同的插入语句
2。分区不确定，需要从数据中获取
参数：(动态分区前两个必须开启）
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrick;
//无限制模式，如果是strict，则必须有一个静态分区，且放在最前面
set hive.exec.max.dynamic.partitions.pernode=10000;
//每个节点生成动态分区的最大个数
set hive.exec.max.dynamic.partitions=100000;
//每次sql查询生成动态分区的最大个数
set hive.exec.max.created.files=150000;
//一个任务最多可以创建的文件数目
set dfs.datanode.max.xcievers=8182;
//限定一次最多打开的文件数
创建分区表
create table d_part (
name string
)
partitioned by (value string)
row format delimited fields terminated by '\t'
lines terminated by '\n'
stored as textfile;
插入动态分区
insert overwrite table d_part partition(value)
select name,addr as value
from testtext;

分桶
set hive.enforce.bucketing=true;

按id分桶
create table bucketed_user
(id string,
name string)
clustered by (id) sorted by(name) into 4 buckets
row format delimited fields terminated by '\t'
lines terminated by '\n'
stored as textfile;
分桶抽样
select * from bucketed_user tablesample(bucket 1 out of 2 alt="Hive SQL汇总">