Hive集群安装部署
上传安装包并解压
cd /ddhome/tools
tar -zxvf apache-hive-3.1.2-bin.tar.gz -C /ddhome/bin/
cd /ddhome/bin/
mv apache-hive-3.1.2-bin hive
- 注意:如果Hive要使用Spark计算引擎,需要重新编译Hive, 这里已经编译完毕
修改配置文件
cd /ddhome/bin/hive/conf
touch hive-site.xml
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?><!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<configuration>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://ddavc:3306/hive?createDatabaseIfNotExist=true&useSSL=false&useUnicode=true&characterEncoding=UTF-8</value>
<description>JDBC connect string for a JDBC metastore</description>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
<description>Driver class name for a JDBC metastore</description>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>hive</value>
<description>username to use against metastore database</description>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>HzBaiyu0571!</value>
<description>password to use against metastore database</description>
</property>
<property>
<name>datanucleus.autoCreateSchema</name>
<value>true</value>
</property>
<property>
<name>datanucleus.autoCreateTables</name>
<value>true</value>
</property>
<property>
<name>datanucleus.autoCreateColumns</name>
<value>true</value>
</property>
<!-- 设置 hive仓库的HDFS上的位置 -->
<property>
<name>hive.metastore.warehouse.dir</name>
<value>/user/hive/warehouse</value>
<description>location of default database for the warehouse</description>
</property>
<!--资源临时文件存放位置 -->
<property>
<name>hive.downloaded.resources.dir</name>
<value>/ddhome/bin/hive/tmp/resources</value>
<description>Temporary local directory for added resources in the remote file system.</description>
</property>
<!-- Hive在0.9版本之前需要设置hive.exec.dynamic.partition为true, Hive在0.9版本之后默认为true -->
<property>
<name>hive.exec.dynamic.partition</name>
<value>true</value>
</property>
<property>
<name>hive.exec.dynamic.partition.mode</name>
<value>nonstrict</value>
</property>
<!-- 修改日志位置 -->
<property>
<name>hive.exec.local.scratchdir</name>
<value>/ddhome/bin/hive/tmp/logs</value>
<description>Local scratch space for Hive jobs</description>
</property>
<property>
<name>hive.querylog.location</name>
<value>/ddhome/bin/hive/tmp/logs</value>
<description>Location of Hive run time structured log file</description>
</property>
<property>
<name>hive.server2.logging.operation.log.location</name>
<value>/ddhome/bin/hive/tmp/logs</value>
<description>Top level directory where operation tmp are stored if logging functionality is enabled</description>
</property>
<!-- Hiveserver2已经不再需要hive.metastore.local这个配置项了(hive.metastore.uris为空,则表示是metastore在本地,否则就是远程)远程的话直接配置hive.metastore.uris即可 -->
<property>
<name>hive.metastore.uris</name>
<value>thrift://ddavb:9083</value>
<description>Thrift URI for the remote metastore. Used by metastore client to connect to remote metastore.</description>
</property>
<property>
<name>hive.server2.thrift.bind.host</name>
<value>ddavc</value>
</property>
<property>
<name>hive.server2.thrift.port</name>
<value>10000</value>
</property>
<property>
<name>hive.server2.thrift.http.port</name>
<value>10001</value>
</property>
<property>
<name>hive.server2.thrift.http.path</name>
<value>cliservice</value>
</property>
<!-- HiveServer2的WEB UI -->
<property>
<name>hive.server2.webui.host</name>
<value>ddavc</value>
</property>
<property>
<name>hive.server2.webui.port</name>
<value>10002</value>
</property>
<property>
<name>hive.scratch.dir.permission</name>
<value>755</value>
</property>
<!-- 下面hive.aux.jars.path这个属性里面你这个jar包地址如果是本地的记住前面要加file://不然找不到, 而且会报org.apache.hadoop.hive.contrib.serde2.RegexSerDe错误 -->
<!--
<property>
<name>hive.aux.jars.path</name>
<value>file:///home/centos/soft/spark/lib/spark-assembly-1.6.0-hadoop2.6.0.jar</value>
</property>
-->
<property>
<name>hive.server2.enable.doAs</name>
<value>false</value>
</property>
<property>
<name>hive.auto.convert.join</name>
<value>false</value>
</property>
<property>
<name>spark.dynamicAllocation.enabled</name>
<value>true</value>
<description>动态分配资源</description>
</property>
<!-- 使用Hive on spark时,若不设置下列该配置会出现内存溢出异常 -->
<!--
<prioperty>
<name>spark.driver.extraJavaOptions</name>
<value>-XX:PermSize=128M -XX:MaxPermSize=512M</value>
</property>
-->
<!-- 关闭元数据存储授权 -->
<property>
<name>hive.metastore.event.db.notification.api.auth</name>
<value>false</value>
</property>
<!-- 关闭元数据存储版本的验证 -->
<property>
<name>hive.metastore.schema.verification</name>
<value>false</value>
</property>
</configuration>
注意:
‒ 注意该配置文件中的数据库的相关配置信息
‒ 如果使用的mysql8,则需要更换mysql链接jar包以及将 com.mysql.jdbc.Driver 修改为 com.mysql.cj.jdbc.Driver
同步hadoop配置文件
cd /ddhome/bin/hadoop/etc/hadoop
cp -r core-site.xml hdfs-site.xml yarn-site.xml /ddhome/bin/hive/conf/
同步Mysql依赖
cd /ddhome/tools/
cp mysql-connector-java-5.1.37.jar /ddhome/bin/hive/lib/
在Mysql中创建用户和数据库
-- 创建数据库
create database if not exists hive;
-- 创建用户
grant all privileges on hive.* to 'hive'@'%' identified by 'HzBaiyu0571!';
alter database hive character set latin1;
-- 刷新配置
flush privileges;
分发文件
cd /ddhome/bin/
scp.sh hive
数据初始化
## 在ddavc中执行初始化数据库命令
schematool -initSchema -dbType mysql
启动相关进程
# 注意:在启动hive进程的时候需确保hadoop相关进程已经启动
# 在ddavc启动相关进程
nohup hive --service metastore &
nohup hive --service hiveserver2 &
# 查看进程
ps -ef | grep metastore
ps -ef | grep hiveserver2
Hive on Spark 配置
前置条件:Spark安装包需重新编译再上传到服务器中
Spark安装
‒ 在Hive所有节点中部署Spark
‒ 上传并解压安装包
cd /ddhome/tools/
tar -zxvf spark-3.0.0-bin-hadoop3.2.tgz -C /ddhome/bin/
mv spark-3.0.0-bin-hadoop3.2 spark
- 在hive中创建spark的配置文件
cd /ddhome/bin/hive/conf/
touch sparkdefaults.conf
##添加如下内容(在执行任务时,会根据如下参数执行)
spark.master yarn
spark.eventLog.enabled true
spark.eventLog.dir hdfs://masters/spark-history
spark.executor.memory 600m
spark.driver.memory 1g
- 在Hdfs文件文件系统中,创建存储历史日志
hadoop fs -mkdir /spark-history
上传Spark纯净jar包
cd /ddhome/tools/
tar -zxvf spark-3.0.0-bin-without-hadoop.tgz ./
hadoop fs -mkdir /spark-jars
hadoop fs -put spark-3.0.0-bin-without-hadoop/jars/* /spark-jars
修改hive-site.xml配置文件
<!--Spark 依赖位置(注意:端口号 9000 必须和 namenode 的端口号一致)-->
<property>
<name>spark.yarn.jars</name>
<value>hdfs://masters/spark-jars/*</value>
</property>
<!--Hive 执行引擎-->
<property>
<name>hive.execution.engine</name>
<value>spark</value>
</property>
重启hive相关进行
kill杀死runjar的相关进程
验证
create table student(id int, name string);
insert into table student values(1,'abc');
-- 若出现 hive on Spark job等字样说明配置成功