本文共 1567 字,大约阅读时间需要 5 分钟。
【hive建表】
create table CharacterLogin(AppID string,GameID string,ChildId string,IP string,ServerID string,AccountID string,CharacterID string,LogType string,LogTime int,PlatformChannelId string,IsLogin int,OnlineTime int,Level int,VIPLevel int)partitioned by(year string,month string,day string) row format delimited fields terminated by '|';
将数据根据时间分区导入Hive,分区格式为,年/月/日(将文件用xshell上传到linux过程略)
shell脚本如下
path="/home/li/Desktop/CharacterLogin"files=$(ls $path)for filename in $filesdo filename=${filename%.*} var1=`echo "$filename"|awk -F '-' '{print $1}'` var2=`echo "$filename"|awk -F '-' '{print $2}'` var3=`echo "$filename"|awk -F '-' '{print $3}'` $HIVE_HOME/bin/hive -e "load data local inpath '$path/$filename.txt' into table bigtrouble.CharacterLogin partition (year='$var1',month='$var2',day='$var3');"done
【查询思路】
新建HIve数据库为统计库,新建一张表为统计结果表,表中两个字段date,count
查询已分区的角色登录登出表,根据用户ID去重,统计单张表的ID总数,取单表的日期和ID总数插入统计表
shell脚本如下
path="/home/li/Desktop/CharacterLogin" files=$(ls $path) for filename in $filesdo filename=${filename%.*} var1=`echo "$filename"|awk -F '-' '{print $1}'` var2=`echo "$filename"|awk -F '-' '{print $2}'` var3=`echo "$filename"|awk -F '-' '{print $3}'` count=`$HIVE_HOME/bin/hive -e "select count(distinct AccountID) from bigtrouble.CharacterLogin where year='$var1' and month='$var2' and day='$var3';"`#不打印查询字段名 $HIVE_HOME/bin/hive -e "set hive.cli.print.header=false;"$HIVE_HOME/bin/hive -e "insert into table day01.count1 values('$var1/$var2/$var3','$count');" done
【坑】
注意把Hsql语句执行的结果赋值给shell变量时,那个符号不是单引号
转载地址:http://ssazi.baihongyu.com/