Hadoop+Hbase+Spark集群配置—Hadoop HA安装

    xiaoxiao2021-03-25  129

    

    解压、改名 tar -zxvf hadoop-2.6.0.tar.gz mv hadoop-2.6.0 hadoop 验证hadoop安装成功 hadoop version

    修改hadoop配置文件 [1] vi /usr/hadoop/hadoop/etc/hadoop/core-site.xml ############################################### <?xml version="1.0" encoding="UTF-8"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <configuration>   <!-- 开启垃圾箱功能,1440分钟 -->   <property>     <name>fs.trash.interval</name>     <value>1440</value>   </property>

      <!-- 指定hdfs的nameservice为ns1,是NameNode的URI。hdfs://主机名:端口/ -->   <property>     <name>fs.defaultFS</name>     <value>hdfs://gagcluster:8020</value>   </property>

      <property>     <name>io.file.buffer.size</name>     <value>131072</value>   </property>

      <!-- 指定hadoop临时目录 -->   <property>     <name>hadoop.tmp.dir</name>     <value>/usr/hadoop/storage/hadoop/tmp</value>   </property>

      <!--指定可以在任何IP访问-->   <property>     <name>hadoop.proxyuser.root.hosts</name>     <value>*</value>   </property>

      <!--指定所有用户可以访问-->   <property>     <name>hadoop.proxyuser.root.groups</name>     <value>*</value>   </property>

      <!-- 指定zookeeper地址 -->   <property>     <name>ha.zookeeper.quorum</name>     <value>SA01:2181,SA02:2181,SA03:2181</value>   </property> </configuration> #################################################

    [2] vi /usr/hadoop/hadoop/etc/hadoop/hdfs-site.xml ################################################ <?xml version="1.0" encoding="UTF-8"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <configuration> <!--节点黑名单列表文件,用于下线hadoop节点 --> <property>   <name>dfs.hosts.exclude</name>   <value>/usr/hadoop/hadoop/etc/hadoop/exclude</value> </property>

    <!--指定hdfs的block大小64M -->   <property>     <name>dfs.block.size</name>     <value>67108864</value>   </property>

    <!--指定hdfs的nameservice为ns1,需要和core-site.xml中的保持一致 -->   <property>     <name>dfs.nameservices</name>     <value>gagcluster</value>   </property>

    <!-- ns1下面有两个NameNode,分别是nn1,nn2 -->   <property>     <name>dfs.ha.namenodes.gagcluster</name>     <value>nn1,nn2</value>   </property>

    <!-- nn1的RPC通信地址 -->   <property>     <name>dfs.namenode.rpc-address.gagcluster.nn1</name>     <value>SA01:8020</value>   </property>

    <!-- nn1的http通信地址 -->   <property>     <name>dfs.namenode.http-address.gagcluster.nn1</name>     <value>SA01:50070</value>   </property>

    <!-- nn2的RPC通信地址 -->   <property>     <name>dfs.namenode.rpc-address.gagcluster.nn2</name>     <value>SA02:8020</value>   </property>

    <!-- nn2的http通信地址 -->   <property>     <name>dfs.namenode.http-address.gagcluster.nn2</name>     <value>SA02:50070</value>   </property>

    <!-- 指定NameNode的元数据在JournalNode上的存放位置 -->   <property>     <name>dfs.namenode.shared.edits.dir</name>     <value>qjournal://SA01:8485;SA02:8485;SA03:8485/gagcluster</value>   </property>

    <!-- 配置失败自动切换实现方式 -->   <property>     <name>dfs.client.failover.proxy.provider.gagcluster</name>     <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>   </property>

    <!-- 配置隔离机制 -->   <property>     <name>dfs.ha.fencing.methods</name>     <value>sshfence</value>   </property>

    <!-- 使用隔离机制时需要ssh免密码登陆 -->   <property>     <name>dfs.ha.fencing.ssh.private-key-files</name>     <value>/root/.ssh/id_rsa</value>   </property>

    <!-- 指定NameNode的元数据在JournalNode上的存放位置 -->   <property>     <name>dfs.journalnode.edits.dir</name>     <value>/usr/hadoop/storage/hadoop/journal</value>   </property>

    <!--指定支持高可用自动切换机制-->   <property>     <name>dfs.ha.automatic-failover.enabled</name>     <value>true</value>   </property>

    <!--指定namenode名称空间的存储地址-->   <property>     <name>dfs.namenode.name.dir</name>        <value>/usr/hadoop/storage/hadoop/name</value>   </property>

    <!--指定datanode数据存储地址-->   <property>     <name>dfs.datanode.data.dir</name>     <value>file:/usr/hadoop/storage/hadoop/data</value>   </property>

    <!--指定数据冗余份数-->   <property>     <name>dfs.replication</name>     <value>2</value>   </property>

    <!--指定可以通过web访问hdfs目录-->   <property>     <name>dfs.webhdfs.enabled</name>     <value>true</value>   </property>

    <!--保证数据恢复 -->   <property>     <name>dfs.journalnode.http-address</name>     <value>0.0.0.0:8480</value>   </property>

      <property>     <name>dfs.journalnode.rpc-address</name>     <value>0.0.0.0:8485</value>   </property>

      <property>     <name>ha.zookeeper.quorum</name>     <value>SA01:2181,SA02:2181,SA03:2181</value>   </property> </configuration> ################################################# [3] cp /usr/hadoop/hadoop/etc/hadoop/mapred-site.xml.template /usr/hadoop/hadoop/etc/hadoop/mapred-site.xml vi /usr/hadoop/hadoop/etc/hadoop/mapred-site.xml ################################################# <?xml version="1.0"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <configuration> <!-- 配置MapReduce运行于yarn中 -->   <property>     <name>mapreduce.framework.name</name>     <value>yarn</value>   </property>

    <!-- 配置 MapReduce JobHistory Server 地址 ,默认端口10020 -->   <property>     <name>mapreduce.jobhistory.address</name>     <value>0.0.0.0:10020</value>   </property>

    <!-- 配置 MapReduce JobHistory Server web ui 地址, 默认端口19888 -->   <property>     <name>mapreduce.jobhistory.webapp.address</name>     <value>0.0.0.0:19888</value>   </property> </configuration> ###################################################

    [4] vi /usr/hadoop/hadoop/etc/hadoop/yarn-site.xml ################################################### <?xml version="1.0"?> <configuration> <!--日志聚合功能-->    <property>      <name>yarn.log-aggregation-enable</name>      <value>true</value>   </property>

    <!--在HDFS上聚合的日志最长保留多少秒。3天-->    <property>      <name>yarn.log-aggregation.retain-seconds</name>      <value>259200</value>   </property>

    <!--rm失联后重新链接的时间-->    <property>       <name>yarn.resourcemanager.connect.retry-interval.ms</name>       <value>2000</value>    </property>

    <!--开启resource manager HA,默认为false-->    <property>       <name>yarn.resourcemanager.ha.enabled</name>       <value>true</value>    </property> 

    <!--配置resource manager -->   <property>     <name>yarn.resourcemanager.ha.rm-ids</name>     <value>rm1,rm2</value>   </property>

      <property>     <name>ha.zookeeper.quorum</name>     <value>SA01:2181,SA02:2181,SA03:2181</value>    </property>    <!--开启故障自动切换-->    <property>       <name>yarn.resourcemanager.ha.automatic-failover.enabled</name>       <value>true</value>    </property> 

      <property>     <name>yarn.resourcemanager.hostname.rm1</name>     <value>SA01</value>   </property>                         <property>      <name>yarn.resourcemanager.hostname.rm2</name>      <value>SA02</value>   </property>

    <!--在namenode1上配置rm1,在namenode2上配置rm2,注意:一般都喜欢把配置好的文件远程复制到其它机器上,但这个在YARN的另一个机器上一定要修改-->    <property>      <name>yarn.resourcemanager.ha.id</name>      <value>rm1</value>   </property> 

    <!--开启自动恢复功能-->    <property>     <name>yarn.resourcemanager.recovery.enabled</name>      <value>true</value>    </property>

    <!--配置与zookeeper的连接地址-->    <property>      <name>yarn.resourcemanager.zk-state-store.address</name>      <value>SA01:2181,SA02:2181,SA03:2181</value>   </property> 

      <property>      <name>yarn.resourcemanager.store.class</name>      <value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>    </property> 

      <property>     <name>yarn.resourcemanager.zk-address</name>     <value>SA01:2181,SA02:2181,SA03:2181</value>   </property>

      <property>      <name>yarn.resourcemanager.cluster-id</name>      <value>gagcluster-yarn</value>    </property> 

    <!--schelduler失联等待连接时间-->    <property>      <name>yarn.app.mapreduce.am.scheduler.connection.wait.interval-ms</name>      <value>5000</value>    </property> 

    <!--配置rm1-->    <property>      <name>yarn.resourcemanager.address.rm1</name>      <value>SA01:8132</value>    </property> 

      <property>      <name>yarn.resourcemanager.scheduler.address.rm1</name>      <value>SA01:8130</value>    </property> 

      <property>      <name>yarn.resourcemanager.webapp.address.rm1</name>      <value>SA01:8188</value>    </property> 

      <property>     <name>yarn.resourcemanager.resource-tracker.address.rm1</name>      <value>SA01:8131</value>    </property> 

      <property>      <name>yarn.resourcemanager.admin.address.rm1</name>      <value>SA01:8033</value>    </property> 

      <property>      <name>yarn.resourcemanager.ha.admin.address.rm1</name>      <value>SA01:23142</value>    </property> 

    <!--配置rm2-->    <property>      <name>yarn.resourcemanager.address.rm2</name>      <value>SA02:8132</value>    </property> 

      <property>      <name>yarn.resourcemanager.scheduler.address.rm2</name>      <value>SA02:8130</value>    </property> 

      <property>      <name>yarn.resourcemanager.webapp.address.rm2</name>      <value>SA02:8188</value>    </property> 

      <property>     <name>yarn.resourcemanager.resource-tracker.address.rm2</name>      <value>SA02:8131</value>    </property> 

      <property>      <name>yarn.resourcemanager.admin.address.rm2</name>      <value>SA02:8033</value>    </property> 

      <property>      <name>yarn.resourcemanager.ha.admin.address.rm2</name>      <value>SA02:23142</value>    </property> 

      <property>      <name>yarn.nodemanager.aux-services</name>      <value>mapreduce_shuffle</value>    </property> 

      <property>      <name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>      <value>org.apache.hadoop.mapred.ShuffleHandler</value>    </property> 

      <property>      <name>yarn.nodemanager.local-dirs</name>      <value>/usr/hadoop/storage/yarn/local</value>    </property> 

      <property>      <name>yarn.nodemanager.log-dirs</name>      <value>/usr/hadoop/storage/yarn/logs</value>    </property> 

      <property>      <name>mapreduce.shuffle.port</name>      <value>23080</value>    </property> 

    <!--故障处理类-->    <property>      <name>yarn.client.failover-proxy-provider</name>      <value>org.apache.hadoop.yarn.client.ConfiguredRMFailoverProxyProvider</value>    </property> 

      <property>       <name>yarn.resourcemanager.ha.automatic-failover.zk-base-path</name>       <value>/yarn-leader-election</value>   </property> </configuration>

    #######################################################

    配置DataNode节点 vi /usr/hadoop/hadoop/etc/hadoop/slaves #######################################################  slave141 slave142 slave143 Slave144 Slave145

    vi /usr/hadoop/hadoop/etc/hadoop/*.env.sh(三个env.sh文件) ####################################################### export JAVA_HOME=/usr/hadoop/jdk

    创建exclude文件,用于以后下线hadoop节点 touch /usr/hadoop/hadoop/etc/hadoop/exclude

    同步hadoop工程到hadoop002~005机器上面 for ip in `seq 140 145`;do scp -r /usr/hadoop/hadoop slave$ip:/usr/hadoop/;done

    修改nn2配置文件yarn-site.xml #####################################################   <property>     <name>yarn.resourcemanager.ha.id</name>      <value>rm2</value>   </property> #####################################################

    转载请注明原文地址: https://ju.6miu.com/read-11402.html

    最新回复(0)