hive分区和分桶的使用

//创建表,external 外部表
$hive>CREATE external TABLE IF NOT EXISTS t2(id int,name string,age int)
COMMENT ‘xx’ ROW FORMAT DELIMITED FIELDS TERMINATED BY ‘,’ STORED AS TEXTFILE ;

//查看表数据
$hive>desc t2 ;
$hive>desc formatted t2 ;

//加载数据到hive表
$hive>load data local inpath '/home/centos/customers.txt' into table t2 ;	//local上传文件
$hive>load data inpath '/user/centos/customers.txt' [overwrite] into table t2 ;	//移动文件

//复制表
mysql>create table tt as select * from users ;		//携带数据和表结构
mysql>create table tt like users ;			//不带数据,只有表结构

hive>create table tt as select * from users ;	
hive>create table tt like users ;	


//count()查询要转成mr
$hive>select count(*) from t2 ;
$hive>select id,name from t2 ;


//
$hive>select * from t2 order by id desc ;				//MR
	
//启用/禁用表
$hive>ALTER TABLE t2 ENABLE NO_DROP;	//不允许删除
$hive>ALTER TABLE t2 DISABLE NO_DROP;	//允许删除


//分区表,优化手段之一,从目录的层面控制搜索数据的范围。
//创建分区表.
$hive>CREATE TABLE t3(id int,name string,age int) PARTITIONED BY (Year INT, Month INT) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' ;

//显式表的分区信息
$hive>SHOW PARTITIONS t3;

//添加分区,创建目录
$hive>alter table t3 add partition (year=2014, month=12);

//删除分区
hive>ALTER TABLE employee_partitioned DROP IF EXISTS PARTITION (year=2014, month=11);

//分区结构
hive>/user/hive/warehouse/mydb2.db/t3/year=2014/month=11
hive>/user/hive/warehouse/mydb2.db/t3/year=2014/month=12


//加载数据到分区表
hive>load data local inpath '/home/centos/customers.txt' into table t3 partition(year=2014,month=11);

//创建桶表
$hive>CREATE TABLE t4(id int,name string,age int) CLUSTERED BY (id) INTO 3 BUCKETS ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' ;

//加载数据不会进行分桶操作
$hive>load data local inpath '/home/centos/customers.txt' into table t4 ;

//查询t3表数据插入到t4中。
$hive>insert into t4 select id,name,age from t3 ;

//桶表的数量如何设置?
//评估数据量,保证每个桶的数据量block的2倍大小。


//连接查询
$hive>CREATE TABLE customers(id int,name string,age int) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' ;
$hive>CREATE TABLE orders(id int,orderno string,price float,cid int) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' ;

//加载数据到表
//内连接查询
hive>select a.*,b.* from customers a , orders b where a.id = b.cid ;
//左外
hive>select a.*,b.* from customers a left outer join orders b on a.id = b.cid ;
hive>select a.*,b.* from customers a right outer join orders b on a.id = b.cid ;
hive>select a.*,b.* from customers a full outer join orders b on a.id = b.cid ;

发表评论:

◎欢迎参与讨论,请在这里发表您的看法、交流您的观点。

微信扫一扫

微信扫一扫

微信扫一扫,分享到朋友圈

hive分区和分桶的使用