通过Shell数组传参控制Sql脚本串并行调度一例

    xiaoxiao2021-03-25  79

    一、目录环境 [hadoop@emr-worker-9 UserPortrait]$ pwd /home/hadoop/nisj/UserPortrait [hadoop@emr-worker-9 UserPortrait]$ ll total 616 -rw-r--r-- 1 hadoop hadoop 2516 Mar 7 18:21 indexCalc-barrage-byUidOnly.sql -rw-r--r-- 1 hadoop hadoop 4160 Mar 8 16:38 indexCalc-gift-byUidOnly.sql -rw-r--r-- 1 hadoop hadoop 4562 Mar 7 14:29 indexCalc-view-byGameId.sql -rw-r--r-- 1 hadoop hadoop 4562 Mar 7 14:29 indexCalc-view-byRoomId.sql -rw-r--r-- 1 hadoop hadoop 4529 Mar 7 14:29 indexCalc-view-byTagId.sql -rw-r--r-- 1 hadoop hadoop 4411 Mar 7 14:29 indexCalc-view-byUidOnly.sql -rw-r--r-- 1 hadoop hadoop 4292 Mar 7 14:29 indexCalc-view-historyBehavior.sql -rw------- 1 hadoop hadoop 565692 Mar 8 17:55 nohup.out -rwxr-x--- 1 hadoop hadoop 569 Mar 8 16:38 user_portrait_view_moreThread.sh 二、串并行调度的Shell脚本 [hadoop@emr-worker-9 UserPortrait]$ cat user_portrait_view_moreThread.sh  #!/usr/bin/env bash echo `date`,'数据跑批开始...' indexCalc_sqlFile_array=( indexCalc-view-byGameId.sql indexCalc-view-byRoomId.sql indexCalc-view-byTagId.sql indexCalc-view-byUidOnly.sql indexCalc-barrage-byUidOnly.sql indexCalc-gift-byUidOnly.sql ) for indexCalc_sqlFile in ${indexCalc_sqlFile_array[*]} do { /usr/lib/hive-current/bin/hive -f /home/hadoop/nisj/UserPortrait/$indexCalc_sqlFile sleep 1 } & done wait /usr/lib/hive-current/bin/hive -f /home/hadoop/nisj/UserPortrait/indexCalc-view-historyBehavior.sql sleep 1 echo `date`,'数据跑批完成!' 三、执行的Sql示例

    /home/hadoop/nisj/UserPortrait/indexCalc-view-byGameId.sql

    -- 观看时长等的计算 -- 观看总时长、总次数;有效观看总时长、有效观看次数、有效平均时长;第一次观看时间 drop table if exists rcd_static_view_bygameid_basic; create table rcd_static_view_bygameid_basic as with Tab_recommend_data_view_byDay as( select uid,gameid,sum(view_time) view_time,pt_day from recommend_data_view where uid>0 group by uid,gameid,pt_day) select uid,gameid, sum(view_time) total_view_time, count(view_time) total_view_times, sum(case when view_time>=5 then view_time else 0 end ) total_view_time_effective, sum(case when view_time>=5 then 1 else 0 end ) total_view_times_effective, sum(case when view_time>=5 then view_time else 0 end )/sum(case when view_time>=5 then 1 else 0 end ) avg_view_time_effective, min(pt_day) frist_view_day, max(pt_day) newest_view_day from Tab_recommend_data_view_byDay group by uid,gameid ; -- 最后一次、倒数第二次有效观看及单次有效最大观看时长相关(时长与时间) drop table if exists rcd_static_view_bygameid_rank; create table rcd_static_view_bygameid_rank as with Tab_recommend_data_view_byDay as( select uid,gameid,sum(view_time) view_time,pt_day from recommend_data_view where uid>0 group by uid,gameid,pt_day) select uid,gameid, max(case when effective_desc_rk=1 then pt_day end) tailender_effective_day, max(case when effective_desc_rk=1 then view_time end) tailender_effective_view_time, max(case when effective_desc_rk=2 then pt_day end) penul_timate_effective_day, max(case when effective_desc_rk=2 then view_time end) penul_timate_effective_view_time, max(case when effective_desc_rk2=1 then pt_day end) max_effective_view_day, max(case when effective_desc_rk2=1 then view_time end) max_effective_view_time from ( select uid,gameid,view_time,pt_day, row_number()over(partition by uid,gameid order by pt_day desc) effective_desc_rk, row_number()over(partition by uid,gameid order by view_time desc) effective_desc_rk2 from Tab_recommend_data_view_byDay where view_time>=5 ) x1 group by uid,gameid ; -- 最近七天有效观看时长 drop table if exists rcd_static_view_bygameid_last7day; create table rcd_static_view_bygameid_last7day as with Tab_recommend_data_view_byDay as( select uid,gameid,sum(view_time) view_time,pt_day from recommend_data_view where uid>0 group by uid,gameid,pt_day) select uid,gameid, sum(case when view_time>=5 then view_time else 0 end ) total_view_time_effective, sum(case when view_time>=5 then 1 else 0 end ) total_view_times_effective, sum(case when view_time>=5 then view_time else 0 end )/sum(case when view_time>=5 then 1 else 0 end ) avg_view_time_effective, min(pt_day) frist_view_day, max(pt_day) newest_view_day from Tab_recommend_data_view_byDay where view_time>=5 and pt_day between date_sub(from_unixtime(unix_timestamp(),'yyyy-MM-dd'),7) and date_sub(from_unixtime(unix_timestamp(),'yyyy-MM-dd'),1) group by uid,gameid ; -- 按周统计每周有效观看时长 drop table if exists rcd_static_view_bygameid_byweek; create table rcd_static_view_bygameid_byweek as with Tab_recommend_data_view_byDay as( select uid,gameid,sum(view_time) view_time,pt_day from recommend_data_view where uid>0 group by uid,gameid,pt_day) select uid,gameid,concat(year(pt_day),'@',weekofyear(pt_day)) week_no, sum(case when view_time>=5 then view_time else 0 end ) total_view_time_effective, sum(case when view_time>=5 then 1 else 0 end ) total_view_times_effective, sum(case when view_time>=5 then view_time else 0 end )/sum(case when view_time>=5 then 1 else 0 end ) avg_view_time_effective, min(pt_day) frist_view_day, max(pt_day) newest_view_day from Tab_recommend_data_view_byDay where view_time>=5 group by uid,gameid,concat(year(pt_day),'@',weekofyear(pt_day)) ; -- 按月统计每月有效观看时长 drop table if exists rcd_static_view_bygameid_bymonth; create table rcd_static_view_bygameid_bymonth as with Tab_recommend_data_view_byDay as( select uid,gameid,sum(view_time) view_time,pt_day from recommend_data_view where uid>0 group by uid,gameid,pt_day) select uid,gameid,concat(year(pt_day),'@',month(pt_day)) month_no, sum(case when view_time>=5 then view_time else 0 end ) total_view_time_effective, sum(case when view_time>=5 then 1 else 0 end ) total_view_times_effective, sum(case when view_time>=5 then view_time else 0 end )/sum(case when view_time>=5 then 1 else 0 end ) avg_view_time_effective, min(pt_day) frist_view_day, max(pt_day) newest_view_day from Tab_recommend_data_view_byDay where view_time>=5 group by uid,gameid,concat(year(pt_day),'@',month(pt_day)) ;

    转载请注明原文地址: https://ju.6miu.com/read-38892.html

    最新回复(0)