1. 建立Hadoop账户
sudo adduser hadoop
sudo passwd hadoop
2. 安装SSH, 配置SSH无密码登录
用hadoop账户登录
sudo apt-get install openssh-server
ssh localhost
exit
cd ~/.ssh/
ssh-keygen -t rsa
cat ./id_rsa.pub >> ./authorized_keys
3. 安装Hadoop至/usr/local/中
cd /usr/local
wget http://apache.claz.org/hadoop/common/hadoop-2.7.2/hadoop-2.7.2.tar.gz
tar xzf hadoop-2.7.2.tar.gz
mv ./hadoop-2.7.2/ ./hadoop
sudo chown -R hadoop ./hadoop
cd /usr/local/hadoop
./bin/hadoop version
4. 执行例子
cd /usr/local/hadoop
mkdir ./input
cp ./etc/hadoop/*.xml ./input
./bin/hadoop jar /usr/local/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.2.jar grep /usr/local/hadoop/input/ /usr/local/hadoop/output/ 'dfs[a-z]+'
rm -r ./output #Hadoop 默认不会覆盖结果文件,因此再次运行上面实例会提示出错,需要先将 ./output 删除
5. 更改环境变量
sudo su
cd /etc/
sudo vim bash.bashrc
#----add----
if [ -d "$HOME/bin" ] ; then
PATH="$HOME/bin:$PATH"
fi
PATH="$PATH:/usr/local/google-cloud-sdk/bin:/usr/lib/postgresql/9.5/bin:/usr/local/hadoop/bin"
#----finish add----