当前位置: 首页 > article >正文

Hive集群安装部署

上传安装包并解压

cd /ddhome/tools 
tar -zxvf apache-hive-3.1.2-bin.tar.gz -C /ddhome/bin/
cd /ddhome/bin/ 
mv apache-hive-3.1.2-bin hive
  • 注意:如果Hive要使用Spark计算引擎,需要重新编译Hive, 这里已经编译完毕

修改配置文件

cd /ddhome/bin/hive/conf
touch hive-site.xml 
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?><!--
   Licensed to the Apache Software Foundation (ASF) under one or more
   contributor license agreements.  See the NOTICE file distributed with
   this work for additional information regarding copyright ownership.
   The ASF licenses this file to You under the Apache License, Version 2.0
   (the "License"); you may not use this file except in compliance with
   the License.  You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
-->

<configuration>
<property>
    <name>javax.jdo.option.ConnectionURL</name>
    <value>jdbc:mysql://ddavc:3306/hive?createDatabaseIfNotExist=true&amp;useSSL=false&amp;useUnicode=true&amp;characterEncoding=UTF-8</value>
    <description>JDBC connect string for a JDBC metastore</description>
</property>
<property>
    <name>javax.jdo.option.ConnectionDriverName</name>
    <value>com.mysql.jdbc.Driver</value>
    <description>Driver class name for a JDBC metastore</description>
</property>
<property>
    <name>javax.jdo.option.ConnectionUserName</name>
    <value>hive</value>
    <description>username to use against metastore database</description>
</property>
<property>
    <name>javax.jdo.option.ConnectionPassword</name>
    <value>HzBaiyu0571!</value>
    <description>password to use against metastore database</description>
</property>
<property>
    <name>datanucleus.autoCreateSchema</name>
    <value>true</value>
</property>
<property>
    <name>datanucleus.autoCreateTables</name>
    <value>true</value>
</property>
<property>
    <name>datanucleus.autoCreateColumns</name>
    <value>true</value>
</property>
<!-- 设置 hive仓库的HDFS上的位置 -->
<property>
    <name>hive.metastore.warehouse.dir</name>
    <value>/user/hive/warehouse</value>
    <description>location of default database for the warehouse</description>
</property>
<!--资源临时文件存放位置 -->
<property>
    <name>hive.downloaded.resources.dir</name>
    <value>/ddhome/bin/hive/tmp/resources</value>
    <description>Temporary local directory for added resources in the remote file system.</description>
 </property>
 <!-- Hive在0.9版本之前需要设置hive.exec.dynamic.partition为true, Hive在0.9版本之后默认为true -->
<property>
    <name>hive.exec.dynamic.partition</name>
    <value>true</value>
 </property>
<property>
    <name>hive.exec.dynamic.partition.mode</name>
    <value>nonstrict</value>
 </property>
<!-- 修改日志位置 -->
<property>
    <name>hive.exec.local.scratchdir</name>
    <value>/ddhome/bin/hive/tmp/logs</value>
    <description>Local scratch space for Hive jobs</description>
</property>
<property>
    <name>hive.querylog.location</name>
    <value>/ddhome/bin/hive/tmp/logs</value>
    <description>Location of Hive run time structured log file</description>
</property>
<property>
    <name>hive.server2.logging.operation.log.location</name>
    <value>/ddhome/bin/hive/tmp/logs</value>
    <description>Top level directory where operation tmp are stored if logging functionality is enabled</description>
</property>
<!-- Hiveserver2已经不再需要hive.metastore.local这个配置项了(hive.metastore.uris为空,则表示是metastore在本地,否则就是远程)远程的话直接配置hive.metastore.uris即可 -->
<property>
    <name>hive.metastore.uris</name>
    <value>thrift://ddavb:9083</value>
    <description>Thrift URI for the remote metastore. Used by metastore client to connect to remote metastore.</description>
</property>
<property>
    <name>hive.server2.thrift.bind.host</name>
    <value>ddavc</value>
</property>
<property>
    <name>hive.server2.thrift.port</name>
    <value>10000</value>
</property>
<property>
    <name>hive.server2.thrift.http.port</name>
    <value>10001</value>
</property>
<property>
    <name>hive.server2.thrift.http.path</name>
    <value>cliservice</value>
</property>
<!-- HiveServer2的WEB UI -->
<property>
    <name>hive.server2.webui.host</name>
    <value>ddavc</value>
</property>
<property>
    <name>hive.server2.webui.port</name>
    <value>10002</value>
</property>
<property>
    <name>hive.scratch.dir.permission</name>
    <value>755</value>
</property>
<!-- 下面hive.aux.jars.path这个属性里面你这个jar包地址如果是本地的记住前面要加file://不然找不到, 而且会报org.apache.hadoop.hive.contrib.serde2.RegexSerDe错误 -->
<!--
<property>
    <name>hive.aux.jars.path</name>
    <value>file:///home/centos/soft/spark/lib/spark-assembly-1.6.0-hadoop2.6.0.jar</value>
</property>
-->
<property>
    <name>hive.server2.enable.doAs</name>
    <value>false</value>
</property>   
<property>
    <name>hive.auto.convert.join</name>
    <value>false</value>
</property>
<property>
    <name>spark.dynamicAllocation.enabled</name>
    <value>true</value>
    <description>动态分配资源</description>  
</property>
<!-- 使用Hive on spark时,若不设置下列该配置会出现内存溢出异常 -->
<!--
<prioperty>
    <name>spark.driver.extraJavaOptions</name>
    <value>-XX:PermSize=128M -XX:MaxPermSize=512M</value>
</property>
-->
<!-- 关闭元数据存储授权  -->
<property>
    <name>hive.metastore.event.db.notification.api.auth</name>
    <value>false</value>
</property>

<!-- 关闭元数据存储版本的验证 -->
<property>
    <name>hive.metastore.schema.verification</name>
    <value>false</value>
</property>
</configuration>

注意:
‒ 注意该配置文件中的数据库的相关配置信息
‒ 如果使用的mysql8,则需要更换mysql链接jar包以及将 com.mysql.jdbc.Driver 修改为 com.mysql.cj.jdbc.Driver

同步hadoop配置文件

cd /ddhome/bin/hadoop/etc/hadoop
cp -r core-site.xml hdfs-site.xml yarn-site.xml /ddhome/bin/hive/conf/

同步Mysql依赖

cd /ddhome/tools/
cp mysql-connector-java-5.1.37.jar /ddhome/bin/hive/lib/

在Mysql中创建用户和数据库

-- 创建数据库
create database if not exists hive;
-- 创建用户
grant all privileges on hive.* to 'hive'@'%' identified by 'HzBaiyu0571!';
alter database hive character set latin1;
-- 刷新配置
flush privileges;

分发文件

cd /ddhome/bin/
scp.sh hive

数据初始化

## 在ddavc中执行初始化数据库命令
schematool -initSchema -dbType mysql

启动相关进程

# 注意:在启动hive进程的时候需确保hadoop相关进程已经启动
# 在ddavc启动相关进程
nohup hive --service metastore &
nohup hive --service hiveserver2 &
# 查看进程
ps -ef | grep metastore
ps -ef | grep hiveserver2 

Hive on Spark 配置

前置条件:Spark安装包需重新编译再上传到服务器中

Spark安装

‒ 在Hive所有节点中部署Spark
‒ 上传并解压安装包

cd /ddhome/tools/
tar -zxvf spark-3.0.0-bin-hadoop3.2.tgz -C /ddhome/bin/
mv spark-3.0.0-bin-hadoop3.2 spark 
  • 在hive中创建spark的配置文件
cd /ddhome/bin/hive/conf/
touch sparkdefaults.conf
##添加如下内容(在执行任务时,会根据如下参数执行) 
spark.master	           yarn
spark.eventLog.enabled   true
spark.eventLog.dir       hdfs://masters/spark-history
spark.executor.memory    600m
spark.driver.memory      1g
  • 在Hdfs文件文件系统中,创建存储历史日志
hadoop fs -mkdir /spark-history

上传Spark纯净jar包

cd /ddhome/tools/
tar -zxvf spark-3.0.0-bin-without-hadoop.tgz ./
hadoop fs -mkdir /spark-jars
hadoop fs -put spark-3.0.0-bin-without-hadoop/jars/* /spark-jars

修改hive-site.xml配置文件

<!--Spark 依赖位置(注意:端口号 9000 必须和 namenode 的端口号一致)--> 
<property> 
  <name>spark.yarn.jars</name> 
  <value>hdfs://masters/spark-jars/*</value>
</property>
<!--Hive 执行引擎--> 
<property> 
  <name>hive.execution.engine</name> 
  <value>spark</value>
</property>

重启hive相关进行

kill杀死runjar的相关进程

验证

create table student(id int, name string);
insert into table student values(1,'abc');
-- 若出现 hive on Spark job等字样说明配置成功

http://www.kler.cn/a/466769.html

相关文章:

  • 力扣23.合并K个升序链表
  • Redis两种主要的持久化方式是什么?
  • 深入解析 Linux 设备树中的引脚控制(pinctrl)二
  • ip属地的信息准确吗?ip归属地不准确怎么办
  • 【NLP高频面题 - Transformer篇】Transformer的输入中为什么要添加位置编码?
  • 使用命令行管理git项目
  • Markdown中流程图的用法
  • 解决 HTML 表单输入框与按钮对齐问题
  • LeetCode 力扣 热题 100道(二十三)找到字符串中所有字母异位词(C++)
  • issue问题全解
  • 从摩托罗拉手机打印短信的简单方法
  • 深入 Redis:高级特性与最佳实践
  • 下载Stegsolve.jar后运行报错 ”Error: Unable to access jarfile stegslove. ”
  • Hive分区表添加字段
  • 设计模式-创建型设计模式总结
  • 数据库原理与应用期末复习
  • leetcode 面试经典 150 题:同构字符串
  • 创建基于PHP的多接口MD5解密工具
  • 【视频配音加字幕】—— 让每一帧画面都“发声”!
  • 无刷直流电机偏移角度
  • 如何使用简单的方法在Mac计算机上打开 HEIC 文件
  • 每天10分钟学习Netty——基础入门1:Hello,NettyServer
  • JAVA学习笔记_JVM
  • Leetcode3046:分割数组
  • 学习笔记:Diffusion Model的理论推导和代码
  • 基于CentOS的Docker + Nginx + Gitee + Jenkins部署总结