[esw@bigdatamgr1 spark-1.3.0-bin-2.2.0]$ cat conf/spark-defaults.conf
# spark.master spark://bigdatamgr1:7077,bigdata8:7077
# spark.eventLog.enabled true
# spark.eventLog.dir hdfs://namenode:8021/directory
# spark.executor.extraJavaOptions -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three"
# spark.executor.extraJavaOptions -Xmx16g -Xms16g -Xmn256m -XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:CMSInitiatingOccupancyFraction=70 -XX:ParallelGCThreads=10
spark.driver.memory 48g
spark.executor.memory 48g
spark.sql.shuffle.partitions 200
#spark.scheduler.mode FAIR
spark.serializer org.apache.spark.serializer.KryoSerializer
spark.driver.maxResultSize 8g
#spark.kryoserializer.buffer.max.mb 2048
spark.dynamicAllocation.enabled true
spark.dynamicAllocation.minExecutors 4
spark.shuffle.service.enabled true
[esw@bigdatamgr1 conf]$ cat spark-env.sh
#!/usr/bin/env bash
JAVA_HOME=/home/esw/jdk1.7.0_60
# log4j
__add_to_classpath() {
root=$1
if [ -d "$root" ] ; then
for f in `ls $root/*.jar | grep -v -E '/hive.*.jar'` ; do
if [ -n "$SPARK_DIST_CLASSPATH" ] ; then
export SPARK_DIST_CLASSPATH=$SPARK_DIST_CLASSPATH:$f
else
export SPARK_DIST_CLASSPATH=$f
fi
done
fi
}
# this add tail of SPARK_CLASSPATH
__add_to_classpath "/home/esw/apache-hive-0.13.1/lib"
#export HADOOP_CONF_DIR=/data/opt/ibm/biginsights/hadoop-2.2.0/etc/hadoop
export HADOOP_CONF_DIR=/home/esw/hadoop-2.6.0/etc/hadoop
export SPARK_CLASSPATH=$SPARK_CLASSPATH:/home/esw/spark-1.3.0-bin-2.2.0/conf:$HADOOP_CONF_DIR
# HA
SPARK_DAEMON_JAVA_OPTS="-Dspark.deploy.recoveryMode=ZOOKEEPER -Dspark.deploy.zookeeper.url=bi-00-01.bi.domain.com:2181 -Dspark.deploy.zookeeper.dir=/spark"
SPARK_PID_DIR=${SPARK_HOME}/pids
同步
1
for h in `cat slaves ` ; do rsync -vaz hadoop-2.6.0 $h:~/ --delete --exclude=work --exclude=logs --exclude=metastore_db --exclude=data --exclude=pids ; done