1.hadoop 搭建
1.1 基础环境配置规划
192.168.229.131 | 4c 4G | SecondaryNameNode NameNode DataNode NodeManager ResourceManager |
---|---|---|
192.168.229.132 | 4c2G | DataNode NodeManager |
192.168.229.133 | 4c2G | DataNode NodeManager |
1.2前置操作
sh
## 创建用户 设置密码 三台机器都需要设置
useradd hadoop
passwd hadoop
## 设置hosts
vim /etc/hosts
192.168.229.131 node1
192.168.229.132 node2
192.168.229.133 node3
## 设置hostname
hostnamectl set-hostname node1
hostnamectl set-hostname node2
hostnamectl set-hostname node3
## 设置 ssh免密登录 三台机器都需要两两设置
ssh-keygen -t rsa
ssh-copy-id hadoop@192.168.229.131
ssh-copy-id hadoop@192.168.229.132
ssh-copy-id hadoop@192.168.229.133
ssh-copy-id localhost
## 配置java 环境变量 三台机器
vim /etc/profile
export JAVA_HOME=/opt/jdk1.8
export PATH=$PATH:$JAVA_HOME/bin
## 生效
source /etc/profile
1.3 hadoop安装配置
sh
## 上传 hadoop-3.1.3.tar.gz
## 解压 hadoop 重命名
tar -zxvf hadoop-3.1.3.tar.gz -c /opt/
mv hadoop-3.1.3.tar.gz hadoop
## 修改文件所有者
chown -R hadoop:hadoop /opt/hadoop
# 配置环境变量
vim ~/.bashrc
export HADOOP_HOME=/opt/hadoop
export PATH=$PATH:$HADOOP_HOME/bin
source ~/.bashrc
## 修改文件 前两个配置 hdfs 后两个配置 yarn
/opt/hadoop/etc/hadoop/core-site.xml
/opt/hadoop/etc/hadoop/hdfs-site.xml
/opt/hadoop/etc/hadoop/mapred-site.xml
/opt/hadoop/etc/hadoop/yarn-site.xml
##
/opt/hadoop/etc/hadoop/workers
/opt/hadoop/etc/hadoop/hadoop-env.sh
## 做完所有配置后 把文件 分发到全部节点
scp /opt/hadoop/etc/hadoop/core-site.xml node2:/opt/hadoop/etc/hadoop/core-site.xml
scp /opt/hadoop/etc/hadoop/hdfs-site.xml node2:/opt/hadoop/etc/hadoop/hdfs-site.xml
scp /opt/hadoop/etc/hadoop/mapred-site.xml node2:/opt/hadoop/etc/hadoop/mapred-site.xml
scp /opt/hadoop/etc/hadoop/yarn-site.xml node2:/opt/hadoop/etc/hadoop/yarn-site.xml
scp /opt/hadoop/etc/hadoop/workers node2:/opt/hadoop/etc/hadoop/workers
scp /opt/hadoop/etc/hadoop/hadoop-env.sh node2:/opt/hadoop/etc/hadoop/hadoop-env.sh
----------------------------------------------------------------------------------
scp /opt/hadoop/etc/hadoop/core-site.xml node3:/opt/hadoop/etc/hadoop/core-site.xml
scp /opt/hadoop/etc/hadoop/hdfs-site.xml node3:/opt/hadoop/etc/hadoop/hdfs-site.xml
scp /opt/hadoop/etc/hadoop/mapred-site.xml node3:/opt/hadoop/etc/hadoop/mapred-site.xml
scp /opt/hadoop/etc/hadoop/yarn-site.xml node3:/opt/hadoop/etc/hadoop/yarn-site.xml
scp /opt/hadoop/etc/hadoop/workers node3:/opt/hadoop/etc/hadoop/workers
scp /opt/hadoop/etc/hadoop/hadoop-env.sh node3:/opt/hadoop/etc/hadoop/hadoop-env.sh
## 格式化hdfs 只需要在第一次做
hdfs namenode -format
## 启动/停止hdfs
start-dfs.sh
stop-dfs.sh
## 启动/停止yarn
start-yarn.sh
stop-yarn.sh
## 启动/停止历史
mapred --daemon start historyserver
mapred --daemon stop historyserver
##web端口
# hdfs 地址
http://node1:9870
# yarn 地址
http://node1:8088
## job history
http://node2:19888
1.4配置文件模板
core-site.xml
xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://node1:8020</value>
</property>
<!--运行文件存储目录-->
<property>
<name>hadoop.tmp.dir</name>
<value>/opt/hadoop/tmp</value>
</property>
<!--缓冲区大小,默认2048,实际动态调整 -->
<property>
<name>io.file.buffer.size</name>
<value>2048</value>
</property>
</configuration>
hdfs-site.xml
xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<!--集群动态上下线
<property>
<name>dfs.hosts</name>
<value>/sjj/install/hadoop-3.2.2/etc/hadoop/accept_host</value>
</property>
<property>
<name>dfs.hosts.exclude</name>
<value>/sjj/install/hadoop-3.2.2/etc/hadoop/deny_host</value>
</property>
-->
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>node1:9868</value>
</property>
<!-- namenode web地址-->
<property>
<name>dfs.namenode.http-address</name>
<value>node1:9870</value>
</property>
<!-- 定义dataNode数据存储的节点位置,实际工作中,一般先确定磁盘挂载目录,然后多个目录用,进行分割 -->
<property>
<name>dfs.datanode.data.dir</name>
<value>/opt/hadoop/data</value>
</property>
<!-- 数据副本 -->
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.permissions.enabled</name>
<value>false</value>
</property>
<property>
<name>dfs.blocksize</name>
<value>134217728</value>
</property>
</configuration>
mapred-site.xml
xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.job.ubertask.enable</name>
<value>true</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>node2:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>node2:19888</value>
</property>
<property>
<name>yarn.app.mapreduce.am.env</name>
<value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
</property>
<property>
<name>mapreduce.map.env</name>
<value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
</property>
<property>
<name>mapreduce.reduce.env</name>
<value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
</property>
</configuration>
yarn-site.xml
xml
<?xml version="1.0"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<configuration>
<!-- Site specific YARN configuration properties -->
<property>
<name>yarn.resourcemanager.hostname</name>
<value>node1</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
</configuration>
workers
xml
node1
node2
node3
hadoop-env.sh
sh
## 也可以把 HADOOP_HOME 配置为全局环境变量
export JAVA_HOME=/opt/jdk1.8
export HADOOP_HOME=/opt/hadoop
export HADOOP_CONF_DIR=${HADOOP_HOME}/etc/hadoop
1.5 hive
sh
上传apache-hive-3.1.3-bin.tar.gz
## 解压 在node1 做
tar -zxvf apache-hive-3.1.3-bin.tar.gz /opt
mv apache-hive-3.1.3-bin hive
rm -rf /opt/hive/lib/guava-19.0.jar
cp /opt/hadoop/share/hadoop/common/lib/guava-27.0-jre.jar /opt/hive/lib/
## 上传 mysql 驱动到 /opt/hive/lib/
/opt/hive/lib/mysql5.jar
## 配置 hive 到 bashrc
vim .bashrc
export HIVE_HOME=/opt/hive
export PATH=$PATH:$HIVE_HOME/bin
source ~/.bashrc
vim hive-env.sh
## 配置mysql 连接信息
vim hive-site.xml
## 初始化
schematool -initSchema -dbType mysql -verbos
## 启动
nohup hive --service metastore &
nohup hive --service hiveserver2 &
## hive 客户端
hive
## 创建数据库
create database test;
show databases;
use test;
show tables;
create table t_contract(id int ,contract_code string,amount decimal(22,4),create_date date);
insert into t_contract(id,contract_code ,amount ,create_date ) values(1,'zs-001',2000.0,'2005-07-10');
insert into t_contract(id,contract_code ,amount ,create_date ) values(2,'zs-002',2000.0,'2004-07-10');
insert into t_contract(id,contract_code ,amount ,create_date ) values(3,'zs-003',10000.0,'2014-07-10');
select datediff("2001-10-1","1998-10-02");
select date_add("2001-10-1",2)
hive-env.sh
sh
HADOOP_HOME=/opt/hadoop
export HIVE_CONF_DIR=/opt/hive/conf
export HIVE_AUX_JARS_PATH=/opt/hive/lib
hive-site.xml
xml
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?><!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<configuration>
<property>
<name>hive.metastore.warehouse.dir</name>
<value>/opt/hive/warehouse</value>
<description>数仓位置</description>
</property>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://192.168.229.1:3306/hive?useSSL=false</value>
<description>MySQL连接协议 </description>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
<description>JDBC连接驱动</description>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>root</value>
<description>用户名</description>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>root</value>
<description>密码</description>
</property>
<property>
<name>hive.server2.thrift.bind.host</name>
<value>node1</value>
<description>H2S绑定host</description>
</property>
<property>
<name>hive.metastore.uris</name>
<value>thrift://node1:9083</value>
<description>远程模式部署metastore地址</description>
</property>
<property>
<name>hive.metastore.event.db.notification.api.auth</name>
<value>false</value>
<description>关闭元数据存储权限</description>
</property>
<property>
<name>hive.metastore.local</name>
<value>false</value>
</property>
</configuration>