# 备份
[hadoop@file1 tools]$ mysqldump -uroot -p hive >hive1.2.1-20160413.backup.sql
# 准备好程序后的目录结构
[hadoop@file1 ~]$ ll
总计 20
drwxrwxr-x 3 hadoop hadoop 4096 04-13 11:59 collect
drwx------ 3 hadoop hadoop 4096 04-07 16:43 dfs
lrwxrwxrwx 1 hadoop hadoop 18 04-11 10:09 hadoop -> tools/hadoop-2.6.3
lrwxrwxrwx 1 hadoop hadoop 40 04-13 10:26 hive -> /home/hadoop/tools/apache-hive-2.0.0-bin
lrwxrwxrwx 1 hadoop hadoop 42 04-13 10:52 spark -> tools/spark-1.6.0-bin-hadoop2-without-hive
drwxrwxr-x 6 hadoop hadoop 4096 04-13 12:10 tmp
drwxrwxr-x 9 hadoop hadoop 4096 04-13 11:48 tools
[hadoop@file1 tools]$ ll
总计 84
drwxrwxr-x 8 hadoop hadoop 4096 04-08 09:25 apache-hive-1.2.1-bin
drwxrwxr-x 8 hadoop hadoop 4096 04-13 10:16 apache-hive-2.0.0-bin
drwxr-xr-x 11 hadoop hadoop 4096 04-07 16:34 hadoop-2.6.3
-rw-rw-r-- 1 hadoop hadoop 46879 04-13 10:11 hive1.2.1-20160413.backup.sql
drwxrwxr-x 2 hadoop hadoop 4096 03-31 15:28 mysql
lrwxrwxrwx 1 hadoop hadoop 36 04-13 10:17 spark -> spark-1.6.0-bin-hadoop2-without-hive
drwxrwxr-x 11 hadoop hadoop 4096 04-07 18:23 spark-1.3.1-bin-hadoop2.6.3-without-hive
drwxrwxr-x 11 hadoop hadoop 4096 03-28 11:15 spark-1.6.0-bin-hadoop2-without-hive
drwxr-xr-x 11 hadoop hadoop 4096 03-31 16:14 zookeeper-3.4.6
# 环境变量我直接加载的是link软链接的,我这直接修改软链就行了。根据情况调整。
# apache-hive-2.0.0-bin同级目录建立spark软链接,或者再hive-env.sh中指定SPARK_HOME的位置
# hive-1.2.1并没有txn的表,所有要单独执行下hive-txn-schema-2.0.0.mysql.sql,
# 然后再更新(后面的Duplicate column的错没问题的)
[hadoop@file1 tools]$ cd apache-hive-2.0.0-bin/scripts/metastore/upgrade/mysql/
[hadoop@file1 mysql]$ mysql -uroot -p
Enter password:
Welcome to the MySQL monitor. Commands end with ; or \g.
Your MySQL connection id is 10765
Server version: 5.5.48 MySQL Community Server (GPL)
Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved.
Oracle is a registered trademark of Oracle Corporation and/or its
affiliates. Other names may be trademarks of their respective
owners.
Type 'help;' or '\h' for help. Type '\c' to clear the current input statement.
mysql> use hive;
Reading table information for completion of table and column names
You can turn off this feature to get a quicker startup with -A
Database changed
mysql> source hive-txn-schema-2.0.0.mysql.sql
Query OK, 0 rows affected (0.01 sec)
Query OK, 0 rows affected (0.00 sec)
Query OK, 0 rows affected (0.04 sec)
Query OK, 0 rows affected (0.03 sec)
Query OK, 1 row affected (0.04 sec)
Query OK, 0 rows affected (0.00 sec)
Query OK, 0 rows affected (0.03 sec)
Records: 0 Duplicates: 0 Warnings: 0
Query OK, 0 rows affected (0.01 sec)
Query OK, 1 row affected (0.00 sec)
Query OK, 0 rows affected (0.01 sec)
Query OK, 0 rows affected (0.01 sec)
Query OK, 0 rows affected (0.00 sec)
Query OK, 1 row affected (0.00 sec)
Query OK, 0 rows affected (0.01 sec)
mysql> source upgrade-1.2.0-to-2.0.0.mysql.sql
+------------------------------------------------+
| |
+------------------------------------------------+
| Upgrading MetaStore schema from 1.2.0 to 2.0.0 |
+------------------------------------------------+
1 row in set, 1 warning (0.00 sec)
+---------------------------------------------------------------------------------------------------------------+
| |
+---------------------------------------------------------------------------------------------------------------+
| < HIVE-7018 Remove Table and Partition tables column LINK_TARGET_ID from Mysql for other DBs do not have it > |
+---------------------------------------------------------------------------------------------------------------+
1 row in set, 1 warning (0.00 sec)
Query OK, 0 rows affected, 1 warning (0.03 sec)
Query OK, 0 rows affected, 1 warning (0.00 sec)
Query OK, 0 rows affected, 1 warning (0.00 sec)
Query OK, 0 rows affected (0.00 sec)
Query OK, 0 rows affected (0.00 sec)
Query OK, 0 rows affected (0.00 sec)
+---------------------------------+
| Completed remove LINK_TARGET_ID |
+---------------------------------+
| Completed remove LINK_TARGET_ID |
+---------------------------------+
1 row in set (0.02 sec)
Query OK, 0 rows affected (0.02 sec)
Query OK, 31 rows affected (0.01 sec)
Records: 31 Duplicates: 0 Warnings: 0
Query OK, 0 rows affected (0.05 sec)
Records: 0 Duplicates: 0 Warnings: 0
Query OK, 0 rows affected (0.02 sec)
Records: 0 Duplicates: 0 Warnings: 0
Query OK, 0 rows affected (0.00 sec)
Records: 0 Duplicates: 0 Warnings: 0
Query OK, 0 rows affected (0.03 sec)
Records: 0 Duplicates: 0 Warnings: 0
Query OK, 0 rows affected (0.00 sec)
Records: 0 Duplicates: 0 Warnings: 0
ERROR 1060 (42S21): Duplicate column name 'CQ_HIGHEST_TXN_ID'
ERROR 1060 (42S21): Duplicate column name 'CQ_META_INFO'
ERROR 1060 (42S21): Duplicate column name 'CQ_HADOOP_JOB_ID'
ERROR 1050 (42S01): Table 'COMPLETED_COMPACTIONS' already exists
ERROR 1060 (42S21): Duplicate column name 'TXN_AGENT_INFO'
ERROR 1060 (42S21): Duplicate column name 'TXN_HEARTBEAT_COUNT'
ERROR 1060 (42S21): Duplicate column name 'HL_HEARTBEAT_COUNT'
ERROR 1060 (42S21): Duplicate column name 'TXN_META_INFO'
ERROR 1060 (42S21): Duplicate column name 'HL_AGENT_INFO'
ERROR 1060 (42S21): Duplicate column name 'HL_BLOCKEDBY_EXT_ID'
ERROR 1060 (42S21): Duplicate column name 'HL_BLOCKEDBY_INT_ID'
ERROR 1050 (42S01): Table 'AUX_TABLE' already exists
Query OK, 1 row affected (0.01 sec)
Rows matched: 1 Changed: 1 Warnings: 0
+---------------------------------------------------------+
| |
+---------------------------------------------------------+
| Finished upgrading MetaStore schema from 1.2.0 to 2.0.0 |
+---------------------------------------------------------+
1 row in set, 1 warning (0.00 sec)
# 拷贝hive原来的配置和依赖jar
[hadoop@file1 mysql]$ cd ~/tools/apache-hive-2.0.0-bin/conf/
[hadoop@file1 conf]$ cp ~/tools/apache-hive-1.2.1-bin/conf/hive-site.xml ./
[hadoop@file1 conf]$ cp ~/tools/apache-hive-1.2.1-bin/conf/spark-defaults.conf ./
[hadoop@file1 conf]$ cp ~/tools/apache-hive-1.2.1-bin/conf/hive-env.sh ./
# 用到spark需要加大PermSize
[hadoop@file1 hive]$ vi conf/hive-env.sh
export HADOOP_USER_CLASSPATH_FIRST=true
export HADOOP_OPTS="$HADOOP_OPTS -XX:MaxPermSize=256m"
[hadoop@file1 conf]$ cd ../lib/
[hadoop@file1 lib]$ cp ~/tools/apache-hive-1.2.1-bin/lib/mysql-connector-java-5.1.34.jar ./
# centos5需要删除下面两个jar,centos6没必要删
[hadoop@file1 apache-hive-2.0.0-bin]$ rm lib/hive-jdbc-2.0.0-standalone.jar
[hadoop@file1 apache-hive-2.0.0-bin]$ rm lib/snappy-java-1.0.5.jar
# spark-1.6.0更新
# http://spark.apache.org/docs/latest/hadoop-provided.html
# http://stackoverflow.com/questions/30906412/noclassdeffounderror-com-apache-hadoop-fs-fsdatainputstream-when-execute-spark-s
[hadoop@file1 apache-hive-2.0.0-bin]$ cd ~/tools/spark-1.6.0-bin-hadoop2-without-hive/conf/
[hadoop@file1 conf]$ cp spark-env.sh.template spark-env.sh
[hadoop@file1 conf]$ vi spark-env.sh
HADOOP_HOME=/home/hadoop/hadoop
SPARK_DIST_CLASSPATH=`$HADOOP_HOME/bin/hadoop classpath`
[hadoop@file1 ~]$ cp ~/tools/spark-1.6.0-bin-hadoop2-without-hive/lib/spark-1.6.0-yarn-shuffle.jar ~/tools/hadoop-2.6.3/share/hadoop/yarn/
[hadoop@file1 ~]$ rm ~/tools/hadoop-2.6.3/share/hadoop/yarn/spark-1.3.1-yarn-shuffle.jar
[hadoop@file1 ~]$ rsync -vaz --delete ~/tools/hadoop-2.6.3/share file2:~/tools/hadoop-2.6.3/
[hadoop@file1 ~]$ rsync -vaz --delete ~/tools/hadoop-2.6.3/share file3:~/tools/hadoop-2.6.3/
[hadoop@file1 ~]$ hdfs dfs -put ~/tools/spark-1.6.0-bin-hadoop2-without-hive/lib/spark-assembly-1.6.0-hadoop2.6.3.jar /spark/
[hadoop@file1 apache-hive-2.0.0-bin]$ vi conf/spark-defaults.conf
spark.yarn.jar hdfs:///spark/spark-assembly-1.6.0-hadoop2.6.3.jar
# 重启yarn(如果你用hiveserver2,先往下看,后面还会修改配置重启的)
[hadoop@file1 apache-hive-2.0.0-bin]$ cd ~/tools/hadoop-2.6.3/
[hadoop@file1 hadoop-2.6.3]$ sbin/stop-yarn.sh
[hadoop@file1 hadoop-2.6.3]$ sbin/start-yarn.sh
更新到这里,执行hive命令是ok了的。但是hiveserver还有问题。
1234567891011121314151617181920
# 启动hiveserver2
[hadoop@file1 hive]$ nohup bin/hiveserver2 &
# 启动spark historyserver
[hadoop@file1 spark]$ cat start-historyserver.sh
source $HADOOP_HOME/libexec/hadoop-config.sh
sbin/start-history-server.sh hdfs:///spark-eventlogs
[hadoop@file1 hive]$ bin/beeline -u jdbc:hive2://file1:10000/ -n hadoop -p hadoop
which: no hbase in (/home/hadoop/hadoop/bin:/home/hadoop/hive/bin:/opt/jdk1.7.0_60/bin:/usr/kerberos/bin:/usr/local/bin:/bin:/usr/bin:/home/hadoop/tools/hadoop-2.6.3/bin:/home/hadoop/tools/hadoop-2.6.3:/home/hadoop/tools/apache-hive-1.2.1-bin:/home/hadoop/bin)
ls: /home/hadoop/hive/lib/hive-jdbc-*-standalone.jar: 没有那个文件或目录
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/home/hadoop/tools/apache-hive-2.0.0-bin/lib/log4j-slf4j-impl-2.4.1.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/home/hadoop/tools/hadoop-2.6.3/share/hadoop/common/lib/slf4j-log4j12-1.7.5.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
SLF4J: Actual binding is of type [org.apache.logging.slf4j.Log4jLoggerFactory]
Connecting to jdbc:hive2://file1:10000/
Error: Failed to open new session: java.lang.RuntimeException: org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.authorize.AuthorizationException): User: hadoop is not allowed to impersonate hadoop (state=,code=0)
Beeline version 2.0.0 by Apache Hive
beeline>
[root@localhost snappy-1.1.3]# ./autogen.sh
Remember to add `AC_PROG_LIBTOOL' to `configure.ac'.
You should update your `aclocal.m4' by running aclocal.
libtoolize: `config.guess' exists: use `--force' to overwrite
libtoolize: `config.sub' exists: use `--force' to overwrite
libtoolize: `ltmain.sh' exists: use `--force' to overwrite
Makefile.am:4: Libtool library used but `LIBTOOL' is undefined
Makefile.am:4:
Makefile.am:4: The usual way to define `LIBTOOL' is to add `AC_PROG_LIBTOOL'
Makefile.am:4: to `configure.ac' and run `aclocal' and `autoconf' again.
Makefile.am:20: `dist_doc_DATA' is used but `docdir' is undefined
spark官网也没讲使用snappy需要做什么额外的配置(默认spark.io.compression.codec默认为snappy)。部署后设置 hive.execution.engine=spark 执行spark查询,立马就报错了 Caused by: java.lang.UnsatisfiedLinkError: /tmp/snappy-1.0.5-libsn
appyjava.so: /usr/lib64/libstdc++.so.6: version `GLIBCXX_3.4.9’ not found (required by /tmp/snappy-1.0.5-libsnappyjava.so) 从错误堆栈看与hadoop-native-snappy没关系,而是一个snappy-java的包。
- 16/04/12 20:20:08 INFO storage.BlockManagerMaster: Registered BlockManager
- java.lang.reflect.InvocationTargetException
- at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
- at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
- at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
- at java.lang.reflect.Method.invoke(Method.java:606)
- at org.xerial.snappy.SnappyLoader.loadNativeLibrary(SnappyLoader.java:322)
- at org.xerial.snappy.SnappyLoader.load(SnappyLoader.java:229)
- at org.xerial.snappy.Snappy.<clinit>(Snappy.java:48)
- at org.apache.spark.io.SnappyCompressionCodec.<init>(CompressionCodec.scala:150)
- at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
- at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57)
- at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
- at java.lang.reflect.Constructor.newInstance(Constructor.java:526)
- at org.apache.spark.io.CompressionCodec$.createCodec(CompressionCodec.scala:68)
- at org.apache.spark.io.CompressionCodec$.createCodec(CompressionCodec.scala:60)
- at org.apache.spark.scheduler.EventLoggingListener.<init>(EventLoggingListener.scala:67)
- at org.apache.spark.SparkContext.<init>(SparkContext.scala:400)
- at org.apache.spark.api.java.JavaSparkContext.<init>(JavaSparkContext.scala:61)
- at org.apache.hive.spark.client.RemoteDriver.<init>(RemoteDriver.java:169)
- at org.apache.hive.spark.client.RemoteDriver.main(RemoteDriver.java:556)
- at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
- at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
- at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
- at java.lang.reflect.Method.invoke(Method.java:606)
- at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:569)
- at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:166)
- at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:189)
- at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:110)
- at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
- Caused by: java.lang.UnsatisfiedLinkError: /tmp/snappy-1.0.5-libsnappyjava.so: /usr/lib64/libstdc++.so.6: version `GLIBCXX_3.4.9' not found (required by /tmp/snappy-1.0.5-libs
- at java.lang.ClassLoader$NativeLibrary.load(Native Method)
- at java.lang.ClassLoader.loadLibrary1(ClassLoader.java:1965)
- at java.lang.ClassLoader.loadLibrary0(ClassLoader.java:1890)
- at java.lang.ClassLoader.loadLibrary(ClassLoader.java:1851)
- at java.lang.Runtime.load0(Runtime.java:795)
- at java.lang.System.load(System.java:1062)
- at org.xerial.snappy.SnappyNativeLoader.load(SnappyNativeLoader.java:39)
- ... 28 more
[hadoop@file1 ~]$ hive
Logging initialized using configuration in file:/home/hadoop/tools/apache-hive-1.2.1-bin/conf/hive-log4j.properties
hive> set hive.execution.engine=spark;
hive> select count(*) from t_info where edate=20160411;
Query ID = hadoop_20160412205338_2c95c5fd-af50-42ba-8681-e154e4b74cb1
Total jobs = 1
Launching Job 1 out of 1
In order to change the average load for a reducer (in bytes):
set hive.exec.reducers.bytes.per.reducer=<number>
In order to limit the maximum number of reducers:
set hive.exec.reducers.max=<number>
In order to set a constant number of reducers:
set mapreduce.job.reduces=<number>
Starting Spark Job = 69afc030-fa1f-4fdf-81ef-12bdca411a4f
Query Hive on Spark job[0] stages:
0
1
Status: Running (Hive on Spark job[0])
Job Progress Format
CurrentTime StageId_StageAttemptId: SucceededTasksCount(+RunningTasksCount-FailedTasksCount)/TotalTasksCount [StageCost]
2016-04-12 20:54:11,367 Stage-0_0: 0(+2)/234 Stage-1_0: 0/1
2016-04-12 20:54:14,421 Stage-0_0: 0(+2)/234 Stage-1_0: 0/1
2016-04-12 20:54:17,457 Stage-0_0: 0(+2)/234 Stage-1_0: 0/1
2016-04-12 20:54:19,486 Stage-0_0: 2(+2)/234 Stage-1_0: 0/1
2016-04-12 20:54:20,497 Stage-0_0: 3(+2)/234 Stage-1_0: 0/1
2016-04-12 20:54:21,509 Stage-0_0: 5(+2)/234 Stage-1_0: 0/1
2016-04-12 20:54:22,520 Stage-0_0: 6(+2)/234 Stage-1_0: 0/1
2016-04-12 20:54:23,532 Stage-0_0: 7(+2)/234 Stage-1_0: 0/1
bash-4.1# puppet agent -t
Info: Creating a new SSL key for 5a56be361905.localdomain
Info: Caching certificate for ca
Info: csr_attributes file loading from /etc/puppetlabs/puppet/csr_attributes.yaml
Info: Creating a new SSL certificate request for 5a56be361905.localdomain
Info: Certificate Request fingerprint (SHA256): 58:1A:2E:28:D3:D7:C5:7B:E3:1A:C2:0F:70:D0:46:C0:34:39:7F:EC:98:65:B1:09:96:D3:4B:A7:4B:32:A6:C6
Info: Caching certificate for ca
Exiting; no certificate found and waitforcert is disabled
# master查看/认证
bash-4.1# puppet cert list
"5a56be361905.localdomain" (SHA256) 58:1A:2E:28:D3:D7:C5:7B:E3:1A:C2:0F:70:D0:46:C0:34:39:7F:EC:98:65:B1:09:96:D3:4B:A7:4B:32:A6:C6
"6516b8d0538b.localdomain" (SHA256) F7:49:CC:93:EA:5D:D9:A2:90:33:01:A9:74:86:97:0C:20:0C:EB:24:3A:13:85:64:5C:32:A8:D7:36:91:3C:77
bash-4.1# puppet cert sign --all
Notice: Signed certificate request for 6516b8d0538b.localdomain
Notice: Removing file Puppet::SSL::CertificateRequest 6516b8d0538b.localdomain at '/etc/puppetlabs/puppet/ssl/ca/requests/6516b8d0538b.localdomain.pem'
Notice: Signed certificate request for 5a56be361905.localdomain
Notice: Removing file Puppet::SSL::CertificateRequest 5a56be361905.localdomain at '/etc/puppetlabs/puppet/ssl/ca/requests/5a56be361905.localdomain.pem'
# agent再连
bash-4.1# puppet agent -t
Info: Caching certificate for 5a56be361905.localdomain
Info: Caching certificate_revocation_list for ca
Info: Caching certificate for 5a56be361905.localdomain
Info: Using configured environment 'production'
Info: Retrieving pluginfacts
Info: Retrieving plugin
Info: Caching catalog for 5a56be361905.localdomain
Info: Applying configuration version '1460222614'
Info: Creating state file /opt/puppetlabs/puppet/cache/state/state.yaml
Notice: Applied catalog in 0.02 seconds
晚上很多资料都是旧的,一般都是 puppetmaster + apache/nginx + passenger 。新版本使用puppetserver后,服务运行在JVM之上( Puppet Server is hosted by a Jetty web server ),性能比原来ruby的方式更好(反正官网是这么说的)。所以没必要折腾其他ruby的东西了。
Because Puppet Server runs on the JVM, it takes a bit longer than the Apache/Passenger stack to start and get ready to accept HTTP connections.
Overall, Puppet Server performance is significantly better than a Puppet master running on the Apache/Passenger stack, but the initial startup is definitely slower.