IBM Support

Port and Build MXNet on openPower System

Technical Blog Post


Abstract

Port and Build MXNet on openPower System

Body

1 Install some official/third-party package dependencies

1.1 for Redhat Linux 7.2

yum install numpy.ppc64le
yum install protobuf-lite-static.ppc64le protobuf-lite.ppc64le protobuf-lite-devel.ppc64le
yum install gtest-devel.ppc64le gtest.ppc64le

yum install openmpi-devel.ppc64le

yum install opencv-devel.ppc64le

Install CUDA, and CUDNN

1.2 for Ubuntu

Please refer to the last section, and install corresponding dependencies.

1.3 TODO

Please give feedback if I miss some dependencies.

2 Clone the master branch from git repo

Use below command to clone the source code.

git clone --recursive https://github.com/dmlc/mxnet.git -b master master

And cd master.

All following operations are under master directory.

 

3 makefile and source code change

Except the main module mxnet, there are three submodules used by MXNet:

dmlc-core

mshadow

ps-lite

 

diff is based on:

mxnet module:

commit 6f3845091a2ffe96bddc133d44f4cdcb278b81ba

dmlc-core submodule:

commit 39007ac49b6087339dc3104324cb4e0de47f1c5f

mshadow submodule:

commit f67e112dcfe7e054cb7866d289d8c826808bd359

ps-lite submodule:

commit 4a060e4e8aa40c3a931a0f8af9211279e012f8a2

3.1 Change for MXNet itself

3.1.1 Makefile

No need to pass the "USE_SSE" to submodule "dmlc-core", will use CPU_ARCH to determine use SSE or not.

 

diff --git a/Makefile b/Makefile

index 3ef5661..b768cd6 100644

--- a/Makefile

+++ b/Makefile

@@ -225,7 +225,7 @@ PSLITE:

 $(DMLC_CORE)/libdmlc.a: DMLCCORE

 

 DMLCCORE:

-       + cd $(DMLC_CORE); make libdmlc.a USE_SSE=$(USE_SSE) config=$(ROOTDIR)/$(config); cd $(ROOTDIR)

+       + cd $(DMLC_CORE); make libdmlc.a config=$(ROOTDIR)/$(config); cd $(ROOTDIR)

 

 bin/im2rec: tools/im2rec.cc $(ALL_DEP)

 

3.1.2 make/config.mk

description for the change:

Need to detect the CPU architecture, so that can use "-msse" for X86, and use "-mvsx" for openPower.

 

diff --git a/make/config.mk b/make/config.mk

index 44fa4d9..646beb3 100644

--- a/make/config.mk

+++ b/make/config.mk

@@ -24,6 +24,9 @@ export CC = gcc

 export CXX = g++

 export NVCC = nvcc

 

+# CPU architecture

+CPU_ARCH =

+

 # whether compile with debug

 DEBUG = 0

 

@@ -38,12 +41,12 @@ ADD_CFLAGS =

 #---------------------------------------------

 

 # whether use CUDA during compile

-USE_CUDA = 0

+USE_CUDA = 1

 

 # add the path to CUDA library to link and compile flag

 # if you have already add them to environment variable, leave it as NONE

 # USE_CUDA_PATH = /usr/local/cuda

-USE_CUDA_PATH = NONE

+USE_CUDA_PATH = /usr/local/cuda

 

 # whether use CuDNN R3 library

 USE_CUDNN = 0

@@ -76,7 +79,7 @@ UNAME_S := $(shell uname -s)

 ifeq ($(UNAME_S), Darwin)

 USE_BLAS = apple

 else

-USE_BLAS = atlas

+USE_BLAS = openblas

 endif

 

 # add path to intel library, you may need it for MKL, if you did not add the path

@@ -94,10 +97,17 @@ endif

 # Settings for power and arm arch

 #----------------------------

 ARCH := $(shell uname -a)

-ifneq (,$(filter $(ARCH), armv6l armv7l powerpc64le ppc64le aarch64))

-       USE_SSE=0

-else

-       USE_SSE=1

+CPU_ARCH =

+ifneq (,$(filter $(ARCH), powerpc64le ppc64le))

+       CPU_ARCH = POWER64

+else ifneq (,$(filter $(ARCH), armv6l armv7l))

+       CPU_ARCH = ARM

+else ifneq (,$(filter $(ARCH), aarch64))

+       CPU_ARCH = ARM64

+else ifneq (,$(filter $(ARCH), i386))

+       CPU_ARCH = X86

+else ifneq (,$(filter $(ARCH), x86_64))

+       CPU_ARCH = X86_64

 endif

 

 #----------------------------

@@ -105,7 +115,7 @@ endif

 #----------------------------

 

 # whether or not to enable multi-machine supporting

-USE_DIST_KVSTORE = 0

+USE_DIST_KVSTORE = 1

 

 # whether or not allow to read and write HDFS directly. If yes, then hadoop is

 # required

 

3.1.3 tests/nightly/download.sh

The fold topology of the web server is changed, but the download script is synchronized with the web server.

 

diff --git a/tests/nightly/download.sh b/tests/nightly/download.sh

index 4488829..daa30a2 100644

--- a/tests/nightly/download.sh

+++ b/tests/nightly/download.sh

@@ -1,20 +1,22 @@

 #!/bin/bash

 

 dmlc_download() {

-    url=http://data.dmlc.ml/mxnet/datasets/

-    dir=$1

-    file=$2

-    if [ ! -e data/${dir}/$file ]; then

-        wget ${url}/${dir}/${file} -P data/${dir}/ || exit -1

+    url=http://data.dmlc.ml/mxnet/data/

+    file=$1

+    if [ ! -e data/$file ]; then

+        wget ${url}/${file} -P data/ || exit -1

     else

-        echo "data/${dir}/$file already exits"

+        echo "data/$file already exits"

     fi

 }

 

-dmlc_download mnist t10k-images-idx3-ubyte

-dmlc_download mnist t10k-labels-idx1-ubyte

-dmlc_download mnist train-images-idx3-ubyte

-dmlc_download mnist train-labels-idx1-ubyte

+dmlc_download mnist.zip

+dmlc_download cifar10.zip

 

-dmlc_download cifar10 train.rec

-dmlc_download cifar10 test.rec

+if [ ! -e data/cifar10 ]; then

+    cd data && unzip cifar10.zip && mv cifar cifar10 && cd ..

+fi

+

+if [ ! -e data/mnist ]; then

+    cd data && unzip mnist.zip -d mnist && cd ..

+fi

 

3.2 change for dmlc-core submodule

under fold dmlc-core, there are two changes here:

Makefile

make/config.mk

 

3.2.1 Makefile

 

diff --git a/Makefile b/Makefile

index 3f7089c..cdb94a9 100644

--- a/Makefile

+++ b/Makefile

@@ -15,12 +15,13 @@ export CFLAGS = -O3 -Wall -Wno-unknown-pragmas -Iinclude  -std=c++0x

 LDFLAGS+= $(DMLC_LDFLAGS)

 CFLAGS+= $(DMLC_CFLAGS)

 

-ifndef USE_SSE

-       USE_SSE = 1

-endif

-

-ifeq ($(USE_SSE), 1)

+# need add support for other CPU architectures

+ifeq ($(CPU_ARCH), X86_64)

+       CFLAGS += -msse2

+else ifeq ($(CPU_ARCH), X86)

        CFLAGS += -msse2

+else ifeq ($(CPU_ARCH), POWER64)

+       CFLAGS += -mvsx

 endif

 

 ifdef DEPS_PATH

 

3.2.2 make/config.mk

diff --git a/make/config.mk b/make/config.mk

index a33361e..2465bc7 100644

--- a/make/config.mk

+++ b/make/config.mk

@@ -39,3 +39,18 @@ GTEST_PATH=

 

 # path to third-party dependences such as glog

 DEPS_PATH=

+

+# detect CPU architecture

+ARCH := $(shell uname -a)

+CPU_ARCH =

+ifneq (,$(filter $(ARCH), powerpc64le ppc64le))

+       CPU_ARCH = POWER64

+else ifneq (,$(filter $(ARCH), armv6l armv7l))

+       CPU_ARCH = ARM

+else ifneq (,$(filter $(ARCH), aarch64))

+       CPU_ARCH = ARM64

+else ifneq (,$(filter $(ARCH), i386))

+       CPU_ARCH = X86

+else ifneq (,$(filter $(ARCH), x86_64))

+       CPU_ARCH = X86_64

+endif

 

3.3 changes for mshadow

under fold mshadow, there are two changes:

make/mshadow.mk

mshadow/base.h

3.3.1 make/mshadow.mk

diff --git a/make/mshadow.mk b/make/mshadow.mk

index 634f52b..6a81075 100644

--- a/make/mshadow.mk

+++ b/make/mshadow.mk

@@ -13,12 +13,12 @@ MSHADOW_LDFLAGS = -lm

 MSHADOW_NVCCFLAGS =

 MKLROOT =

 

-ifndef USE_SSE

-       USE_SSE=1

-endif

-

-ifeq ($(USE_SSE), 1)

-       MSHADOW_CFLAGS += -msse3

+ifeq ($(CPU_ARCH), X86_64)

+       MSHADOW_CFLAGS += -msse3 -DMSHADOW_USE_SSE=1

+else ifeq ($(CPU_ARCH), X86)

+       MSHADOW_CFLAGS += -msse3 -DMSHADOW_USE_SSE=1

+else ifeq ($(CPU_ARCH), POWER64)

+       MSHADOW_CFLAGS += -mvsx -DMSHADOW_USE_SSE=0

 else

        MSHADOW_CFLAGS += -DMSHADOW_USE_SSE=0

 endif

 

3.3.2 mshadow/base.h

diff --git a/mshadow/base.h b/mshadow/base.h

index c2e65d1..953b7ea 100644

--- a/mshadow/base.h

+++ b/mshadow/base.h

@@ -117,10 +117,16 @@ typedef unsigned __int64 uint64_t;

                           __cplusplus >= 201103L || defined(_MSC_VER))

 #endif

 

+#if defined(__x86_64__) || defined(__i386__)

 /*! \brief whether use SSE */

 #ifndef MSHADOW_USE_SSE

   #define MSHADOW_USE_SSE 1

 #endif

+#elif defined(__PPC64__)

+// Disbale SSE on Power systems

+#define MSHADOW_USE_SSE 0

+#endif

+

 /*! \brief whether use NVML to get dynamic info */

 #ifndef MSHADOW_USE_NVML

   #define MSHADOW_USE_NVML 0

 

3.4 ps-lite

under fold ps-lite, there are three changes:

Makefile

make/deps.mk

make/ps.mk

3.4.1 Makefile

diff --git a/Makefile b/Makefile

index 5f4f6f7..54282eb 100644

--- a/Makefile

+++ b/Makefile

@@ -22,10 +22,10 @@ include make/ps.mk

 

 INCPATH = -I./src -I./include -I$(DEPS_PATH)/include

 

+CPU_ARCH := $(shell uname -a)

 ifneq (,$(filter $(CPU_ARCH), X86_64 X86))

 CFLAGS = -std=c++11 -msse2 -fPIC -O3 -ggdb -Wall -finline-functions $(INCPATH) $(ADD_CFLAGS)

-endif

-ifneq (,$(filter $(CPU_ARCH), powerpc64le ppc64le))

+else ifneq (,$(filter $(CPU_ARCH), powerpc64le ppc64le))

 CFLAGS = -std=c++11 -mvsx -fPIC -O3 -ggdb -Wall -finline-functions $(INCPATH) $(ADD_CFLAGS)

 else

 CFLAGS = -std=c++11 -fPIC -O3 -ggdb -Wall -finline-functions $(INCPATH) $(ADD_CFLAGS)

 

3.4.2 make/deps.mk

if command protoc is existing in the os, will not download the source code. Do this change, that is because the provided source code cannot be compiled on Power.

diff --git a/make/deps.mk b/make/deps.mk

index b83a143..e3c6830 100644

--- a/make/deps.mk

+++ b/make/deps.mk

@@ -6,8 +6,7 @@ WGET = wget

 endif

 

 # protobuf

-PROTOBUF = ${DEPS_PATH}/include/google/protobuf/message.h

-${PROTOBUF}:

+${PROTOC}:

        $(eval FILE=protobuf-2.5.0.tar.gz)

        $(eval DIR=protobuf-2.5.0)

        rm -rf $(FILE) $(DIR)

 

3.4.3 make/ps.mk

In order to run mxnet in a distributed environment, the original design is to compile mxnet against some static libraries, such as libzmq and libprotobuf-lite.

diff --git a/make/ps.mk b/make/ps.mk

index 0b0f678..9bd02bb 100644

--- a/make/ps.mk

+++ b/make/ps.mk

@@ -5,9 +5,34 @@

 #

 #----------------------------------------------------------------------------------------

 

+OS_RELEASE = /etc/os-release

+OS_DIST =

+ifeq ($(OS_RELEASE), $(wildcard $(OS_RELEASE)))

+OS_RELEASE_CONTENT = $(shell cat $(OS_RELEASE))

+       ifeq (rhel, $(findstring rhel, $(OS_RELEASE_CONTENT)))

+       OS_DIST = rhel

+       endif

+

+       ifeq (ubuntu, $(findstring ubuntu,$(OS_RELEASE_CONTENT)))

+       OS_DIST = ubuntu

+       endif

+endif

+

 ifeq ($(USE_KEY32), 1)

 ADD_CFLAGS += -DUSE_KEY32=1

 endif

 

-PS_LDFLAGS_SO = -L$(DEPS_PATH)/lib -lprotobuf-lite -lzmq

-PS_LDFLAGS_A = $(addprefix $(DEPS_PATH)/lib/, libprotobuf-lite.a libzmq.a)

+PS_LDFLAGS_SO = -L$(DEPS_PATH)/lib -lzmq -lprotobuf-lite

+

+PS_LDFLAGS_A = $(addprefix $(DEPS_PATH)/lib/, libzmq.a)

+ifeq ($(USE_SYSTEM_PROTOC), 0)

+PS_LDFLAGS_A += $(addprefix $(DEPS_PATH)/lib/, libprotobuf-lite.a)

+else

+       ifeq (rhel, $(OS_DIST))

+       PS_LDFLAGS_A += /lib64/libprotobuf-lite.a

+       else ifeq (ubuntu, $(OS_DIST))

+       PS_LDFLAGS_A += /usr/lib/powerpc64le-linux-gnu/libprotobuf-lite.a

+       else

+       PS_LDFLAGS_A += -lprotobuf-lite    #use dynamic library for other platforms first

+       endif

+endif

 

4 Set header file directory path for openBLAS in environment to compile the code

export C_INCLUDE_PATH=/usr/include/openblas/:$C_INCLUDE_PATH
export CPLUS_INCLUDE_PATH=/usr/include/openblas/:$CPLUS_INCLUDE_PATH

so other library path is needed for different configurations, such as CUDA, CUDNN, openMP, openCV.

 

5 compiling the source code

In fold master, use "make" to compile the source code.

6 Testing the build

python example/image-classification/train_mnist.py

7 Run all tests

tests/nightly/test_all.sh

[{"Business Unit":{"code":"BU054","label":"Systems w\/TPS"},"Product":{"code":"HW1W1","label":"Power ->PowerLinux"},"Component":"","Platform":[{"code":"PF025","label":"Platform Independent"}],"Version":"","Edition":"","Line of Business":{"code":"","label":""}}]

UID

ibm16170019