简介:Hadoop分布式文件系统(HDFS)被设计成适合运行在通用硬件(commodity hardware)上的分布式文件系统。它和现有的分布式文件系统有很多共同点。但同时,它和其他的分布式文件系统的区别也是很明显的。HDFS是一个高度容错性的系统,适合部署在廉价的机器上。HDFS能提供高吞吐量的数据访问,非常适合大规模数据集上的应用。HDFS放宽了一部分POSIX约束,来实现流式读取文件系统数据的目的。HDFS在最开始是作为Apache Nutch搜索引擎项目的架构而开发的。HDFS是Apache Hadoop Core项目的一部分。
一、配置network
[root@db1 local]# cat /etc/sysconfig/network
# Created by anaconda
NETWORKING=yes
HOSTNAME=db1
[root@db2 local]# cat /etc/sysconfig/network
# Created by anaconda
NETWORKING=yes
HOSTNAME=db2
[root@db3 local]# cat /etc/sysconfig/network
# Created by anaconda
NETWORKING=yes
HOSTNAME=db3
二、配置hosts
[root@db1 local]# cat /etc/hosts
10.0.0.51 db1
10.0.0.52 db2
10.0.0.53 db3
[root@db2 local]# cat /etc/hosts
10.0.0.51 db1
10.0.0.52 db2
10.0.0.53 db3
[root@db3 local]# cat /etc/hosts
10.0.0.51 db1
10.0.0.52 db2
10.0.0.53 db3
三、配置免密分发密钥
[root@db2 local]# ssh-keygen -t rsa
[root@db2 local]# ssh-copy-id root@10.0.0.52
[root@db2 local]# ssh-copy-id root@10.0.0.53
[root@db1 local]# ssh db2
Last login: Tue Aug 4 10:10:45 2020 from 10.0.0.52
[root@db2 ~]# exit
logout
Connection to db2 closed.
[root@db1 local]# ssh db3
Last login: Tue Aug 4 10:11:00 2020 from 10.0.0.52
四、安装jdk环境
下载地址:https://www.oracle.com/java/technologies/javase/javase-jdk8-downloads.html
Product / File Description:Linux x64 Compressed Archive
Download:jdk-8u261-linux-x64.tar.gz
需要注册一个oracle账号
#拷贝到其他节点
[root@db1 local]# scp jdk-8u261-linux-x64.tar.gz db2:`pwd`
[root@db1 local]# scp jdk-8u261-linux-x64.tar.gz db3:`pwd`
#解压
[root@db1 local]# tar -zxvf jdk-8u261-linux-x64.tar.gz
[root@db2 local]# tar -zxvf jdk-8u261-linux-x64.tar.gz
[root@db3 local]# tar -zxvf jdk-8u261-linux-x64.tar.gz
五、添加环境变量
[root@db1 local]# tail -n 5 /etc/profile
export PATH=$PATH:/opt/mongodb/bin
export JAVA_HOME=/usr/local/jdk1.8.0_261
export JRE_HOME=$JAVA_HOME/jre
export PATH=$JAVA_HOME/bin:$PATH
export CLASSPATH=$JAVA_HOME/lib:$JRE_HOME/lib
[root@db2 local]# tail -n 5 /etc/profile
export PATH=${M2_HOME}/bin:$PATH
export JAVA_HOME=/usr/local/jdk1.8.0_261
export JRE_HOME=$JAVA_HOME/jre
export PATH=$JAVA_HOME/bin:$PATH
export CLASSPATH=$JAVA_HOME/lib:$JRE_HOME/lib
[root@db3 local]# tail -n 5 /etc/profile
export PATH=${M2_HOME}/bin:$PATH
export JAVA_HOME=/usr/local/jdk1.8.0_261
export JRE_HOME=$JAVA_HOME/jre
export PATH=$JAVA_HOME/bin:$PATH
export CLASSPATH=$JAVA_HOME/lib:$JRE_HOME/lib
[root@db1 local]# source /etc/profile
[root@db2 local]# source /etc/profile
[root@db3 local]# source /etc/profile
六、检查jdk环境
[root@db1 local]# java -version
java version "1.8.0_261"
Java(TM) SE Runtime Environment (build 1.8.0_261-b12)
Java HotSpot(TM) 64-Bit Server VM (build 25.261-b12, mixed mode)
[root@db2 local]# java -version
java version "1.8.0_261"
Java(TM) SE Runtime Environment (build 1.8.0_261-b12)
Java HotSpot(TM) 64-Bit Server VM (build 25.261-b12, mixed mode)
[root@db3 local]# java -version
java version "1.8.0_261"
Java(TM) SE Runtime Environment (build 1.8.0_261-b12)
Java HotSpot(TM) 64-Bit Server VM (build 25.261-b12, mixed mode)
七、安装Hadoop
#hadoop包下载地址
https://www.apache.org/dyn/closer.cgi/hadoop/common/hadoop-2.7.7/hadoop-2.7.7.tar.gz
#wget 下载
[root@db1 local]# wget https://www.apache.org/dyn/closer.cgi/hadoop/common/hadoop-2.7.7/hadoop-2.7.7.tar.gz
[root@db1 local]# scp hadoop-2.7.7.tar.gz db2:`pwd`
hadoop-2.7.7.tar.gz 100% 209MB 18.5MB/s 00:11
[root@db1 local]# scp hadoop-2.7.7.tar.gz db3:`pwd`
hadoop-2.7.7.tar.gz 100% 209MB 16.5MB/s 00:12
#解压
[root@db1 local]# tar -zxvf hadoop-2.7.7.tar.gz
[root@db2 local]# tar -zxvf hadoop-2.7.7.tar.gz
[root@db3 local]# tar -zxvf hadoop-2.7.7.tar.gz
八、环境变量添加
[root@db1 local]# vim /etc/profile
export HADOOP_HOME=/usr/local/hadoop-2.7.7
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
[root@db1 local]# source /etc/profile
[root@db2 local]# vim /etc/profile
export HADOOP_HOME=/usr/local/hadoop-2.7.7
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
[root@db2 local]# source /etc/profile
[root@db3 local]# vim /etc/profile
export HADOOP_HOME=/usr/local/hadoop-2.7.7
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
[root@db3 local]# source /etc/profile
九、配置hadoop
#进入到该目录中
[root@db1 hadoop]# pwd
/usr/local/hadoop-2.7.7/etc/hadoop
#需要配置的文件有
hadoop-env.sh
# The java implementation to use.
export JAVA_HOME=/usr/local/jdk1.8.0_261 #添加这个配置
yarn-env.sh
# some Java parameters
export JAVA_HOME=/usr/local/jdk1.8.0_261 #添加这个配置
core-site.xml
#添加如下代码
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://db1:9000</value>
</property>
<property>
<name>io.file.buffer.size</name>
<value>131072</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>file:/usr/temp</value>
</property>
<property>
<name>hadoop.proxyuser.root.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.root.groups</name>
<value>*</value>
</property>
</configuration>
#file后的文件目录需要创建
mkdir /usr/temp
hdfs-site.xml
#添加如下代码
<configuration>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>db1:9001</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/usr/dfs/name</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:/usr/dfs/data</value>
</property>
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>
<property>
<name>dfs.web.ugi</name>
<value>supergroup</value>
</property>
</configuration>
#需创建file后的代码
mkdir /usr/dfs/data -p
mapred-site.xml
#改名
mv mapred-site.xml.template mapred-site.xml
#添加如下代码
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>db1:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>db1:19888</value>
</property>
</configuration>
yarn-site.xml
#添加如下代码
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.resourcemanager.address</name>
<value>db1:8032</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>db1:8030</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>db1:8031</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address</name>
<value>db1:8033</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>db1:8088</value>
</property>
</configuration>
slaves
#分别改成对应节点的主机名
mv slaves db1
mv slaves db2
mv slaves db3
十、格式化主节点的namenode
[root@db1 hadoop-2.7.7]# pwd
/usr/local/hadoop-2.7.7
[root@db1 hadoop-2.7.7]# ./bin/hadoop namenode -format
20/08/04 15:01:57 INFO common.Storage: Storage directory /usr/dfs/name has been successfully formatted. #格式化成功
十一、启动hadoop
[root@db1 hadoop-2.7.7]# ./sbin/start-all.sh
This script is Deprecated. Instead use start-dfs.sh and start-yarn.sh
Starting namenodes on [db1]
root@db1's password:
db1: namenode running as process 24812. Stop it first.
root@db1's password:
db1: starting datanode, logging to /usr/local/hadoop-2.7.7/logs/hadoop-root-datanode-db1.out
Starting secondary namenodes [db1]
root@db1's password:
db1: secondarynamenode running as process 24989. Stop it first.
starting yarn daemons
resourcemanager running as process 25181. Stop it first.
root@db1's password:
db1: starting nodemanager, logging to /usr/local/hadoop-2.7.7/logs/yarn-root-nodemanager-db1.out
十二、在db2上启动ResourceManager:
[root@db2 hadoop-2.7.7]# sbin/yarn-daemon.sh start resourcemanager
starting resourcemanager, logging to /usr/local/hadoop-2.7.7/logs/yarn-root-resourcemanager-db2.out
十三、db3上运行MapReduce日志服务
[root@db3 hadoop-2.7.7]# sbin/mr-jobhistory-daemon.sh start historyserver
starting historyserver, logging to /usr/local/hadoop-2.7.7/logs/mapred-root-historyserver-db3.out
[root@db3 hadoop-2.7.7]# jps
十四、查看jps进程
[root@db1 hadoop-2.7.7]# jps
29480 Jps
24812 NameNode
24989 SecondaryNameNode
25181 ResourceManager
25871 DataNode
26191 NodeManager
[root@db2 hadoop-2.7.7]# jps
26099 NodeManager
25782 DataNode
28810 Jps
[root@db2 hadoop-2.7.7]# jps
26099 NodeManager
25782 DataNode
28810 Jps
0 Comments