sqoop1,将sqlserver导入hbase

2017-03-24 20:08
#!/bin/sh
#数据库连接
srcConnect="jdbc:sqlserver://10.2.44.181:1433;username=mc_testor;password=0000002am;database=Gree_MonitorCenter_New"

#表名
dstTabName=wp04_0013_data
checkColumn="RowKey"
condiColumn="[记录时间]"
startRecoid=2017-01-06
plus=1
endRecodid=2017-01-11
echo "from ${startRecoid} to ${endRecodid}" > log.out

#hbase表名
hbaseTableName=wp04_0013_data_
#hbase存放的目录
hbaseTablePath=/user/hbase/biz1/wp04_0013_data
#hbase临时存放目录
hbasetempPath=/user/hive/biz1/${hbaseTableName}temp


#要执行sql
sql="select  reverse(right('0000000000'+ltrim(a.[ProjDev_ID]),10))+
replace(CONVERT(varchar(12),a.[记录时间],108),':','')+'7F15'+right('000000'+ltrim([Reco_ID]),6)as RowKey 
,CONVERT(varchar(19),a.[记录时间],121)
+','+ cast (b.Proj_ID as varchar )
+','+  cast (a.[ProjDev_ID] as varchar )
+','+ '224'
+','+ ''
+','+ '' 
+','+ '' 
+','+ '7F15' 
+','+ cast (a.[Reco_ID] as varchar )
+','+'' as c1
from [dbo].[0013_7F15_IDUStateInfo_RSimulateData] a left join Proj_Dev b on a.ProjDev_ID=b.ProjDev_ID "


#分批导入
while [[ $startRecoid != $endRecodid ]]; do
       inter=`date -d "1 day $startRecoid" +%Y-%m-%d`
   temp_endRecodid=`echo $endRecodid | awk '{print $1}'`
           end_time_s=`date -d $temp_endRecodid +%s`
   temp_inter=`echo $inter | awk '{print $1}'`
temphbase_inter=`echo $inter | awk '{print $1}' | tr -d "-"`
inter_time_s=`date -d $temp_inter +%s`
if [ $inter_time_s -gt $end_time_s ]
then
inter=$endRecodid
fi
  temp_startRecoid=`echo $startRecoid | awk '{print $1}' | tr -d "-"`
   starttime=${startRecoid}" 00:00:00:000" # by ljn add                 
            endtime=${startRecoid}" 23:59:59:997"  # by ljn add 2015-11-22 23:58:01.000   23:59:59:997

hadoop fs -rm -r ${hbaseTablePath}/${hbaseTableName}${temp_startRecoid}
#hadoop fs -mkdir ${hbaseTablePath} 1>/dev/null 2>&1

hadoop fs -rm -r ${TemphbasePath}${temp_startRecoid} 1>/dev/null 2>&1



echo "开始导入hbases数据:++++++++++++++++++++++++++++++++"
sqoop import -D mapreduce.job.queuename=production --connect "${srcConnect}" \
     --query "select t.* from (${sql} where ${condiColumn}>='${starttime}' and ${condiColumn}<='${endtime}' ) t WHERE \$CONDITIONS" \
 --split-by ${checkColumn} --fields-terminated-by "\t" --lines-terminated-by "\n" \
 --target-dir ${TemphbasePath}${temp_startRecoid} \
 -m 15

hadoop fs -test -e ${TemphbasePath}${temp_startRecoid}
if [ $? -ne 0 ]
then
echo "+++++++++++++++++++++++文件不存在+++++++++++++++++++++++++"
echo "+++++++++++++++++++++现在截断时间是: $startRecoid +++++++++++++++"
break
fi


startRecoid=$inter
partf1=`hadoop fs -du -s ${TemphbasePath}${temp_startRecoid} |awk '{print $1 }'`
if [ "$partf1" == "0"  ]
then
hadoop fs -rm -r ${TemphbasePath}${temp_startRecoid} 1>/dev/null 2>&1
echo "---------------file is null partf1 :$partf1-----------------------"
continue
fi

hbase org.apache.hadoop.hbase.mapreduce.ImportTsv '-Dimporttsv.separator= ' -Dimporttsv.columns='HBASE_ROW_KEY,f1:c1' -Dimporttsv.bulk.output=${hbaseTablePath}/${hbaseTableName}${temp_startRecoid} biz1:${hbaseTableName}${temp_startRecoid} ${TemphbasePath}${temp_startRecoid}

echo "此时的查询时间值为:+++++++++++++++++++++++++++++${temp_startRecoid}"
echo "此时的hbase存放目录值为:+++++++++++++++++++++++++++++${hbaseTablePath}/${hbaseTableName}${temp_startRecoid}"
echo "此时的表名为:+++++++++++++++++++++++++++++${hbaseTableName}${temp_startRecoid}"
echo "表临时文件目录为:+++++++++++++++++++++++++++++${TemphbasePath}${temp_startRecoid}"



        #ps:
        #f1为列簇名 row key必须指定
        #-Dimporttsv.bulk.output 指定目录
        #smy_test 要生成的表(表不会生成)
        #/user/hdfs/sqlserver hdfs存放数据的目录


        hadoop dfs -chmod -R 777  ${hbaseTablePath}
        #从hfile中load到hbase 表中(这步可以生成表)
        hbase org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles ${hbaseTablePath}/${hbaseTableName}${temp_startRecoid} biz1:${hbaseTableName}${temp_startRecoid}
        
        hadoop fs -rm -r ${hbaseTablePath}/${hbaseTableName}${temp_startRecoid}
        hadoop fs -rm -r ${TemphbasePath}${temp_startRecoid} 1>/dev/null 2>&1


echo "-------------循环完成---------------------------"

done


echo "load Data all over\n" >> log.out