yarn.log-aggregation-enable = true
设置日志删除时间(7天)
yarn.log-aggregation.retain-seconds = 604800
修改日志目录
yarn.nodemanager.remote-app-log-dir = /logs
•对于小作业开启Uber调优使得任务运行在同一个jvm中,减少时间
•配置map-red.xml
开启uber模式
mapreduce.job.ubertask.enable = true
启动uber模式的最大map数,当任务的map数小于9时启动uber模式
mapreduce.job.ubertask.maxmaps = 9
启动uber模式的最大reduce数
mapreduce.job.ubertask.maxreduces = 1
命令行执行指定参数(-D)> etc下配置 >jar中的配置
start-all.sh启动所有(已过时)start-dfs.sh start-yarn.sh分别启动独立进程
core-site.xml
<configuration> <property> <name>fs.defaultFS</name> <value>hdfs://hadoop:8020</value> </property> <property> <name>hadoop.tmp.dir</name> <value>/home/softwares/hadoop-2.7.1/data/tmp</value> </property> <!-- yarn web ui界面右上角显示的用户名称,自定义为hadoop --> <property> <name>hadoop.http.staticuser.user</name> <value>hadoop</value> </property> </configuration>
hadoop-env.sh
# The java implementation to use. export JAVA_HOME=/usr/local/program/jdk1.7.0_67
hdfs-site.xml
<configuration> <property> <name>dfs.replication</name> <value>1</value> </property> <!-- 浏览器访问的端口 --> <property> <name>dfs.namenode.secondary.http-address</name> <value>hadoop:50090</value> </property> <!-- namenode web ui的端口 --> <property> <name>dfs.namenode.http-address</name> <value>hadoop:50070</value> </property> <!-- 关闭权限 --> <property> <name>dfs.permissions.enabled</name> <value>false</value> </property> </configuration>
mapred-site.xm
<configuration> <property> <name>mapreduce.framework.name</name> <value>yarn</value> </property> <!-- 开启uber模式(针对小作业的优化) --> <property> <name>mapreduce.job.ubertask.enable</name> <value>true</value> </property> <!-- 启动uber模式的最大map数 --> <property> <name>mapreduce.job.ubertask.maxmaps</name> <value>9</value> </property> <!-- 启动uber模式的最大reduce数 --> <property> <name>mapreduce.job.ubertask.maxreduces</name> <value>1</value> </property> <!-- jobhistory的web端口 --> <property> <name>mapreduce.jobhistory.webapp.address</name> <value>hadoop:19888</value> </property> <!-- jobhistory通信端口 --> <property> <name>mapreduce.jobhistory.address</name> <value>hadoop:10020</value> </property> </configuration>
yarn-site.xml
<configuration> <!-- Site specific YARN configuration properties --> <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle</value> </property> <!-- 开启日志 --> <property> <name>yarn.log-aggregation-enable</name> <value>true</value> </property> <!-- 日志删除时间(七天) --> <property> <name>yarn.log-aggregation.retain-seconds</name> <value>604800</value> </property> <!-- 修改日志目录 --> <property> <name>yarn.nodemanager.remote-app-log-dir</name> <value>/logs</value> </property> <!-- 配置rm的主机 --> <property> <name>yarn.resourcemanager.hostname</name> <value>hadoop</value> </property> <!-- 配置yarn的web端口--> <property> <name>yarn.resourcemanager.webapp.address</name> <value>hadoop:8088</value> </property> <!-- yarn通信端口 --> <property> <name>yarn.resourcemanager.address</name> <value>hadoop:8032</value> </property> <property> <name>yarn.web-proxy.address</name> <value>hadoop:8888</value> </property> </configuration>