Skip to content

1.hadoop 搭建

1.1 基础环境配置规划

192.168.229.1314c 4GSecondaryNameNode NameNode DataNode NodeManager ResourceManager
192.168.229.1324c2GDataNode NodeManager
192.168.229.1334c2GDataNode NodeManager

1.2前置操作

sh

## 创建用户  设置密码   三台机器都需要设置
useradd hadoop
passwd hadoop
##  设置hosts
vim /etc/hosts
192.168.229.131 node1
192.168.229.132  node2 
192.168.229.133   node3
## 设置hostname
 hostnamectl set-hostname node1
 hostnamectl set-hostname node2
 hostnamectl set-hostname node3
## 设置 ssh免密登录  三台机器都需要两两设置  
ssh-keygen -t rsa 
ssh-copy-id hadoop@192.168.229.131
ssh-copy-id hadoop@192.168.229.132
ssh-copy-id hadoop@192.168.229.133
ssh-copy-id localhost
## 配置java 环境变量 三台机器
vim /etc/profile
export JAVA_HOME=/opt/jdk1.8
export PATH=$PATH:$JAVA_HOME/bin
## 生效
source  /etc/profile

1.3 hadoop安装配置

sh
## 上传 hadoop-3.1.3.tar.gz  
## 解压 hadoop  重命名
tar -zxvf   hadoop-3.1.3.tar.gz  -c /opt/
mv  hadoop-3.1.3.tar.gz   hadoop
##  修改文件所有者
chown -R hadoop:hadoop /opt/hadoop
# 配置环境变量
vim  ~/.bashrc
export HADOOP_HOME=/opt/hadoop
export PATH=$PATH:$HADOOP_HOME/bin

source  ~/.bashrc

##  修改文件   前两个配置 hdfs  后两个配置 yarn
/opt/hadoop/etc/hadoop/core-site.xml
/opt/hadoop/etc/hadoop/hdfs-site.xml
/opt/hadoop/etc/hadoop/mapred-site.xml
/opt/hadoop/etc/hadoop/yarn-site.xml
## 
/opt/hadoop/etc/hadoop/workers
/opt/hadoop/etc/hadoop/hadoop-env.sh

##  做完所有配置后 把文件 分发到全部节点
scp /opt/hadoop/etc/hadoop/core-site.xml node2:/opt/hadoop/etc/hadoop/core-site.xml
scp /opt/hadoop/etc/hadoop/hdfs-site.xml node2:/opt/hadoop/etc/hadoop/hdfs-site.xml
scp /opt/hadoop/etc/hadoop/mapred-site.xml node2:/opt/hadoop/etc/hadoop/mapred-site.xml
scp /opt/hadoop/etc/hadoop/yarn-site.xml node2:/opt/hadoop/etc/hadoop/yarn-site.xml
scp /opt/hadoop/etc/hadoop/workers node2:/opt/hadoop/etc/hadoop/workers
scp /opt/hadoop/etc/hadoop/hadoop-env.sh node2:/opt/hadoop/etc/hadoop/hadoop-env.sh
----------------------------------------------------------------------------------
scp /opt/hadoop/etc/hadoop/core-site.xml node3:/opt/hadoop/etc/hadoop/core-site.xml
scp /opt/hadoop/etc/hadoop/hdfs-site.xml node3:/opt/hadoop/etc/hadoop/hdfs-site.xml
scp /opt/hadoop/etc/hadoop/mapred-site.xml node3:/opt/hadoop/etc/hadoop/mapred-site.xml
scp /opt/hadoop/etc/hadoop/yarn-site.xml node3:/opt/hadoop/etc/hadoop/yarn-site.xml
scp /opt/hadoop/etc/hadoop/workers node3:/opt/hadoop/etc/hadoop/workers
scp /opt/hadoop/etc/hadoop/hadoop-env.sh node3:/opt/hadoop/etc/hadoop/hadoop-env.sh
##  格式化hdfs  只需要在第一次做
hdfs namenode -format
## 启动/停止hdfs
start-dfs.sh
stop-dfs.sh
## 启动/停止yarn
start-yarn.sh
stop-yarn.sh
##   启动/停止历史
mapred --daemon start historyserver
mapred --daemon stop historyserver
##web端口
# hdfs 地址
http://node1:9870
#  yarn 地址
http://node1:8088
## job history
http://node2:19888

1.4配置文件模板

core-site.xml

xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->

<configuration>

 <property>
            <name>fs.defaultFS</name>
            <value>hdfs://node1:8020</value>
    </property>
    <!--运行文件存储目录-->
    <property>
            <name>hadoop.tmp.dir</name>
            <value>/opt/hadoop/tmp</value>
    </property>
    <!--缓冲区大小,默认2048,实际动态调整 -->
    <property>
            <name>io.file.buffer.size</name>
            <value>2048</value>
    </property>
</configuration>

hdfs-site.xml

xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->
<configuration>
<!--集群动态上下线
<property>
    <name>dfs.hosts</name>
    <value>/sjj/install/hadoop-3.2.2/etc/hadoop/accept_host</value>
</property>
<property>
    <name>dfs.hosts.exclude</name>
    <value>/sjj/install/hadoop-3.2.2/etc/hadoop/deny_host</value>
</property>
-->
<property>
    <name>dfs.namenode.secondary.http-address</name>
    <value>node1:9868</value>
</property>
<!-- namenode web地址-->    
<property>
    <name>dfs.namenode.http-address</name>
    <value>node1:9870</value>
</property>

<!-- 定义dataNode数据存储的节点位置,实际工作中,一般先确定磁盘挂载目录,然后多个目录用,进行分割 -->
<property>
    <name>dfs.datanode.data.dir</name>
    <value>/opt/hadoop/data</value>
</property>


<!--  数据副本 -->
<property>
    <name>dfs.replication</name>
    <value>1</value>
</property>
<property>
    <name>dfs.permissions.enabled</name>
    <value>false</value>
</property>
<property>
    <name>dfs.blocksize</name>
    <value>134217728</value>
</property>
</configuration>

mapred-site.xml

xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->

<configuration>
<property>
    <name>mapreduce.framework.name</name>
    <value>yarn</value>
</property>
<property>
    <name>mapreduce.job.ubertask.enable</name>
    <value>true</value>
</property>
<property>
    <name>mapreduce.jobhistory.address</name>
    <value>node2:10020</value>
</property>
<property>
    <name>mapreduce.jobhistory.webapp.address</name>
    <value>node2:19888</value>
</property>
<property>
    <name>yarn.app.mapreduce.am.env</name>
    <value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
</property>
<property>
    <name>mapreduce.map.env</name>
    <value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
</property>
<property>
    <name>mapreduce.reduce.env</name>
    <value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
</property>
</configuration>

yarn-site.xml

xml
<?xml version="1.0"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->
<configuration>

<!-- Site specific YARN configuration properties -->
<property>
    <name>yarn.resourcemanager.hostname</name>
    <value>node1</value>
</property>
<property>
    <name>yarn.nodemanager.aux-services</name>
    <value>mapreduce_shuffle</value>
</property>
</configuration>

workers

xml
node1
node2
node3

hadoop-env.sh

sh
## 也可以把 HADOOP_HOME 配置为全局环境变量
export JAVA_HOME=/opt/jdk1.8
export HADOOP_HOME=/opt/hadoop
export HADOOP_CONF_DIR=${HADOOP_HOME}/etc/hadoop

1.5 hive

sh
上传apache-hive-3.1.3-bin.tar.gz
## 解压   在node1 做
tar -zxvf apache-hive-3.1.3-bin.tar.gz /opt	
mv apache-hive-3.1.3-bin hive
rm -rf /opt/hive/lib/guava-19.0.jar
cp /opt/hadoop/share/hadoop/common/lib/guava-27.0-jre.jar /opt/hive/lib/
## 上传 mysql 驱动到 /opt/hive/lib/
/opt/hive/lib/mysql5.jar
## 配置  hive 到 bashrc
vim .bashrc
export HIVE_HOME=/opt/hive
export PATH=$PATH:$HIVE_HOME/bin

source ~/.bashrc

vim hive-env.sh
##  配置mysql  连接信息
vim hive-site.xml
## 初始化
schematool -initSchema -dbType mysql -verbos
##  启动
nohup hive --service metastore &
nohup hive --service hiveserver2 &
##  hive 客户端
hive 

##  创建数据库
create database test;
show databases;
use test;
show tables;

create table t_contract(id int ,contract_code string,amount decimal(22,4),create_date date);
insert  into t_contract(id,contract_code ,amount ,create_date ) values(1,'zs-001',2000.0,'2005-07-10');
insert  into t_contract(id,contract_code ,amount ,create_date ) values(2,'zs-002',2000.0,'2004-07-10');
insert  into t_contract(id,contract_code ,amount ,create_date ) values(3,'zs-003',10000.0,'2014-07-10');
select datediff("2001-10-1","1998-10-02");
select date_add("2001-10-1",2)

hive-env.sh

sh
HADOOP_HOME=/opt/hadoop
export HIVE_CONF_DIR=/opt/hive/conf
export HIVE_AUX_JARS_PATH=/opt/hive/lib

hive-site.xml

xml
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?><!--
   Licensed to the Apache Software Foundation (ASF) under one or more
   contributor license agreements.  See the NOTICE file distributed with
   this work for additional information regarding copyright ownership.
   The ASF licenses this file to You under the Apache License, Version 2.0
   (the "License"); you may not use this file except in compliance with
   the License.  You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
-->
<configuration>
<property>
    <name>hive.metastore.warehouse.dir</name>
    <value>/opt/hive/warehouse</value>
    <description>数仓位置</description>
  </property>
  <property>
    <name>javax.jdo.option.ConnectionURL</name>
    <value>jdbc:mysql://192.168.229.1:3306/hive?useSSL=false</value>
    <description>MySQL连接协议 </description>
  </property>
  <property>
    <name>javax.jdo.option.ConnectionDriverName</name>
    <value>com.mysql.jdbc.Driver</value>
    <description>JDBC连接驱动</description>
  </property>
  <property>
    <name>javax.jdo.option.ConnectionUserName</name>
    <value>root</value>
    <description>用户名</description>
  </property>
  <property>
    <name>javax.jdo.option.ConnectionPassword</name>
    <value>root</value>
    <description>密码</description> 
  </property>  
  <property>
    <name>hive.server2.thrift.bind.host</name>
    <value>node1</value>
    <description>H2S绑定host</description>
  </property>
  <property>
    <name>hive.metastore.uris</name>
    <value>thrift://node1:9083</value>
    <description>远程模式部署metastore地址</description>
  </property>
  <property>
    <name>hive.metastore.event.db.notification.api.auth</name>
    <value>false</value>
    <description>关闭元数据存储权限</description>
  </property>
  <property>
    <name>hive.metastore.local</name>  
    <value>false</value>  
  </property>
</configuration>