Technical Blog Post
Abstract
Port and Build MXNet on openPower System
Body
1 Install some official/third-party package dependencies
1.1 for Redhat Linux 7.2
yum install numpy.ppc64le
yum install protobuf-lite-static.ppc64le protobuf-lite.ppc64le protobuf-lite-devel.ppc64le
yum install gtest-devel.ppc64le gtest.ppc64le
yum install openmpi-devel.ppc64le
yum install opencv-devel.ppc64le
Install CUDA, and CUDNN
1.2 for Ubuntu
Please refer to the last section, and install corresponding dependencies.
1.3 TODO
Please give feedback if I miss some dependencies.
2 Clone the master branch from git repo
Use below command to clone the source code.
git clone --recursive https://github.com/dmlc/mxnet.git -b master master
And cd master.
All following operations are under master directory.
3 makefile and source code change
Except the main module mxnet, there are three submodules used by MXNet:
dmlc-core
mshadow
ps-lite
diff is based on:
mxnet module:
commit 6f3845091a2ffe96bddc133d44f4cdcb278b81ba
dmlc-core submodule:
commit 39007ac49b6087339dc3104324cb4e0de47f1c5f
mshadow submodule:
commit f67e112dcfe7e054cb7866d289d8c826808bd359
ps-lite submodule:
commit 4a060e4e8aa40c3a931a0f8af9211279e012f8a2
3.1 Change for MXNet itself
3.1.1 Makefile
No need to pass the "USE_SSE" to submodule "dmlc-core", will use CPU_ARCH to determine use SSE or not.
diff --git a/Makefile b/Makefile
index 3ef5661..b768cd6 100644
--- a/Makefile
+++ b/Makefile
@@ -225,7 +225,7 @@ PSLITE:
$(DMLC_CORE)/libdmlc.a: DMLCCORE
DMLCCORE:
- + cd $(DMLC_CORE); make libdmlc.a USE_SSE=$(USE_SSE) config=$(ROOTDIR)/$(config); cd $(ROOTDIR)
+ + cd $(DMLC_CORE); make libdmlc.a config=$(ROOTDIR)/$(config); cd $(ROOTDIR)
bin/im2rec: tools/im2rec.cc $(ALL_DEP)
3.1.2 make/config.mk
description for the change:
Need to detect the CPU architecture, so that can use "-msse" for X86, and use "-mvsx" for openPower.
diff --git a/make/config.mk b/make/config.mk
index 44fa4d9..646beb3 100644
--- a/make/config.mk
+++ b/make/config.mk
@@ -24,6 +24,9 @@ export CC = gcc
export CXX = g++
export NVCC = nvcc
+# CPU architecture
+CPU_ARCH =
+
# whether compile with debug
DEBUG = 0
@@ -38,12 +41,12 @@ ADD_CFLAGS =
#---------------------------------------------
# whether use CUDA during compile
-USE_CUDA = 0
+USE_CUDA = 1
# add the path to CUDA library to link and compile flag
# if you have already add them to environment variable, leave it as NONE
# USE_CUDA_PATH = /usr/local/cuda
-USE_CUDA_PATH = NONE
+USE_CUDA_PATH = /usr/local/cuda
# whether use CuDNN R3 library
USE_CUDNN = 0
@@ -76,7 +79,7 @@ UNAME_S := $(shell uname -s)
ifeq ($(UNAME_S), Darwin)
USE_BLAS = apple
else
-USE_BLAS = atlas
+USE_BLAS = openblas
endif
# add path to intel library, you may need it for MKL, if you did not add the path
@@ -94,10 +97,17 @@ endif
# Settings for power and arm arch
#----------------------------
ARCH := $(shell uname -a)
-ifneq (,$(filter $(ARCH), armv6l armv7l powerpc64le ppc64le aarch64))
- USE_SSE=0
-else
- USE_SSE=1
+CPU_ARCH =
+ifneq (,$(filter $(ARCH), powerpc64le ppc64le))
+ CPU_ARCH = POWER64
+else ifneq (,$(filter $(ARCH), armv6l armv7l))
+ CPU_ARCH = ARM
+else ifneq (,$(filter $(ARCH), aarch64))
+ CPU_ARCH = ARM64
+else ifneq (,$(filter $(ARCH), i386))
+ CPU_ARCH = X86
+else ifneq (,$(filter $(ARCH), x86_64))
+ CPU_ARCH = X86_64
endif
#----------------------------
@@ -105,7 +115,7 @@ endif
#----------------------------
# whether or not to enable multi-machine supporting
-USE_DIST_KVSTORE = 0
+USE_DIST_KVSTORE = 1
# whether or not allow to read and write HDFS directly. If yes, then hadoop is
# required
3.1.3 tests/nightly/download.sh
The fold topology of the web server is changed, but the download script is synchronized with the web server.
diff --git a/tests/nightly/download.sh b/tests/nightly/download.sh
index 4488829..daa30a2 100644
--- a/tests/nightly/download.sh
+++ b/tests/nightly/download.sh
@@ -1,20 +1,22 @@
#!/bin/bash
dmlc_download() {
- url=http://data.dmlc.ml/mxnet/datasets/
- dir=$1
- file=$2
- if [ ! -e data/${dir}/$file ]; then
- wget ${url}/${dir}/${file} -P data/${dir}/ || exit -1
+ url=http://data.dmlc.ml/mxnet/data/
+ file=$1
+ if [ ! -e data/$file ]; then
+ wget ${url}/${file} -P data/ || exit -1
else
- echo "data/${dir}/$file already exits"
+ echo "data/$file already exits"
fi
}
-dmlc_download mnist t10k-images-idx3-ubyte
-dmlc_download mnist t10k-labels-idx1-ubyte
-dmlc_download mnist train-images-idx3-ubyte
-dmlc_download mnist train-labels-idx1-ubyte
+dmlc_download mnist.zip
+dmlc_download cifar10.zip
-dmlc_download cifar10 train.rec
-dmlc_download cifar10 test.rec
+if [ ! -e data/cifar10 ]; then
+ cd data && unzip cifar10.zip && mv cifar cifar10 && cd ..
+fi
+
+if [ ! -e data/mnist ]; then
+ cd data && unzip mnist.zip -d mnist && cd ..
+fi
3.2 change for dmlc-core submodule
under fold dmlc-core, there are two changes here:
Makefile
make/config.mk
3.2.1 Makefile
diff --git a/Makefile b/Makefile
index 3f7089c..cdb94a9 100644
--- a/Makefile
+++ b/Makefile
@@ -15,12 +15,13 @@ export CFLAGS = -O3 -Wall -Wno-unknown-pragmas -Iinclude -std=c++0x
LDFLAGS+= $(DMLC_LDFLAGS)
CFLAGS+= $(DMLC_CFLAGS)
-ifndef USE_SSE
- USE_SSE = 1
-endif
-
-ifeq ($(USE_SSE), 1)
+# need add support for other CPU architectures
+ifeq ($(CPU_ARCH), X86_64)
+ CFLAGS += -msse2
+else ifeq ($(CPU_ARCH), X86)
CFLAGS += -msse2
+else ifeq ($(CPU_ARCH), POWER64)
+ CFLAGS += -mvsx
endif
ifdef DEPS_PATH
3.2.2 make/config.mk
diff --git a/make/config.mk b/make/config.mk
index a33361e..2465bc7 100644
--- a/make/config.mk
+++ b/make/config.mk
@@ -39,3 +39,18 @@ GTEST_PATH=
# path to third-party dependences such as glog
DEPS_PATH=
+
+# detect CPU architecture
+ARCH := $(shell uname -a)
+CPU_ARCH =
+ifneq (,$(filter $(ARCH), powerpc64le ppc64le))
+ CPU_ARCH = POWER64
+else ifneq (,$(filter $(ARCH), armv6l armv7l))
+ CPU_ARCH = ARM
+else ifneq (,$(filter $(ARCH), aarch64))
+ CPU_ARCH = ARM64
+else ifneq (,$(filter $(ARCH), i386))
+ CPU_ARCH = X86
+else ifneq (,$(filter $(ARCH), x86_64))
+ CPU_ARCH = X86_64
+endif
3.3 changes for mshadow
under fold mshadow, there are two changes:
make/mshadow.mk
mshadow/base.h
3.3.1 make/mshadow.mk
diff --git a/make/mshadow.mk b/make/mshadow.mk
index 634f52b..6a81075 100644
--- a/make/mshadow.mk
+++ b/make/mshadow.mk
@@ -13,12 +13,12 @@ MSHADOW_LDFLAGS = -lm
MSHADOW_NVCCFLAGS =
MKLROOT =
-ifndef USE_SSE
- USE_SSE=1
-endif
-
-ifeq ($(USE_SSE), 1)
- MSHADOW_CFLAGS += -msse3
+ifeq ($(CPU_ARCH), X86_64)
+ MSHADOW_CFLAGS += -msse3 -DMSHADOW_USE_SSE=1
+else ifeq ($(CPU_ARCH), X86)
+ MSHADOW_CFLAGS += -msse3 -DMSHADOW_USE_SSE=1
+else ifeq ($(CPU_ARCH), POWER64)
+ MSHADOW_CFLAGS += -mvsx -DMSHADOW_USE_SSE=0
else
MSHADOW_CFLAGS += -DMSHADOW_USE_SSE=0
endif
3.3.2 mshadow/base.h
diff --git a/mshadow/base.h b/mshadow/base.h
index c2e65d1..953b7ea 100644
--- a/mshadow/base.h
+++ b/mshadow/base.h
@@ -117,10 +117,16 @@ typedef unsigned __int64 uint64_t;
__cplusplus >= 201103L || defined(_MSC_VER))
#endif
+#if defined(__x86_64__) || defined(__i386__)
/*! \brief whether use SSE */
#ifndef MSHADOW_USE_SSE
#define MSHADOW_USE_SSE 1
#endif
+#elif defined(__PPC64__)
+// Disbale SSE on Power systems
+#define MSHADOW_USE_SSE 0
+#endif
+
/*! \brief whether use NVML to get dynamic info */
#ifndef MSHADOW_USE_NVML
#define MSHADOW_USE_NVML 0
3.4 ps-lite
under fold ps-lite, there are three changes:
Makefile
make/deps.mk
make/ps.mk
3.4.1 Makefile
diff --git a/Makefile b/Makefile
index 5f4f6f7..54282eb 100644
--- a/Makefile
+++ b/Makefile
@@ -22,10 +22,10 @@ include make/ps.mk
INCPATH = -I./src -I./include -I$(DEPS_PATH)/include
+CPU_ARCH := $(shell uname -a)
ifneq (,$(filter $(CPU_ARCH), X86_64 X86))
CFLAGS = -std=c++11 -msse2 -fPIC -O3 -ggdb -Wall -finline-functions $(INCPATH) $(ADD_CFLAGS)
-endif
-ifneq (,$(filter $(CPU_ARCH), powerpc64le ppc64le))
+else ifneq (,$(filter $(CPU_ARCH), powerpc64le ppc64le))
CFLAGS = -std=c++11 -mvsx -fPIC -O3 -ggdb -Wall -finline-functions $(INCPATH) $(ADD_CFLAGS)
else
CFLAGS = -std=c++11 -fPIC -O3 -ggdb -Wall -finline-functions $(INCPATH) $(ADD_CFLAGS)
3.4.2 make/deps.mk
if command protoc is existing in the os, will not download the source code. Do this change, that is because the provided source code cannot be compiled on Power.
diff --git a/make/deps.mk b/make/deps.mk
index b83a143..e3c6830 100644
--- a/make/deps.mk
+++ b/make/deps.mk
@@ -6,8 +6,7 @@ WGET = wget
endif
# protobuf
-PROTOBUF = ${DEPS_PATH}/include/google/protobuf/message.h
-${PROTOBUF}:
+${PROTOC}:
$(eval FILE=protobuf-2.5.0.tar.gz)
$(eval DIR=protobuf-2.5.0)
rm -rf $(FILE) $(DIR)
3.4.3 make/ps.mk
In order to run mxnet in a distributed environment, the original design is to compile mxnet against some static libraries, such as libzmq and libprotobuf-lite.
diff --git a/make/ps.mk b/make/ps.mk
index 0b0f678..9bd02bb 100644
--- a/make/ps.mk
+++ b/make/ps.mk
@@ -5,9 +5,34 @@
#
#----------------------------------------------------------------------------------------
+OS_RELEASE = /etc/os-release
+OS_DIST =
+ifeq ($(OS_RELEASE), $(wildcard $(OS_RELEASE)))
+OS_RELEASE_CONTENT = $(shell cat $(OS_RELEASE))
+ ifeq (rhel, $(findstring rhel, $(OS_RELEASE_CONTENT)))
+ OS_DIST = rhel
+ endif
+
+ ifeq (ubuntu, $(findstring ubuntu,$(OS_RELEASE_CONTENT)))
+ OS_DIST = ubuntu
+ endif
+endif
+
ifeq ($(USE_KEY32), 1)
ADD_CFLAGS += -DUSE_KEY32=1
endif
-PS_LDFLAGS_SO = -L$(DEPS_PATH)/lib -lprotobuf-lite -lzmq
-PS_LDFLAGS_A = $(addprefix $(DEPS_PATH)/lib/, libprotobuf-lite.a libzmq.a)
+PS_LDFLAGS_SO = -L$(DEPS_PATH)/lib -lzmq -lprotobuf-lite
+
+PS_LDFLAGS_A = $(addprefix $(DEPS_PATH)/lib/, libzmq.a)
+ifeq ($(USE_SYSTEM_PROTOC), 0)
+PS_LDFLAGS_A += $(addprefix $(DEPS_PATH)/lib/, libprotobuf-lite.a)
+else
+ ifeq (rhel, $(OS_DIST))
+ PS_LDFLAGS_A += /lib64/libprotobuf-lite.a
+ else ifeq (ubuntu, $(OS_DIST))
+ PS_LDFLAGS_A += /usr/lib/powerpc64le-linux-gnu/libprotobuf-lite.a
+ else
+ PS_LDFLAGS_A += -lprotobuf-lite #use dynamic library for other platforms first
+ endif
+endif
4 Set header file directory path for openBLAS in environment to compile the code
export C_INCLUDE_PATH=/usr/include/openblas/:$C_INCLUDE_PATH
export CPLUS_INCLUDE_PATH=/usr/include/openblas/:$CPLUS_INCLUDE_PATH
so other library path is needed for different configurations, such as CUDA, CUDNN, openMP, openCV.
5 compiling the source code
In fold master, use "make" to compile the source code.
6 Testing the build
python example/image-classification/train_mnist.py
7 Run all tests
tests/nightly/test_all.sh
UID
ibm16170019