Procedure for changing the Management Node (MN) hostname and IP addresses
Note: nodes already deployed need to be removed, as we will change the IP for the private interface
Original configuration:
private interface: eth0: 10.1.1.75/255.255.255.0
public interface: eth1: 10.1.2.75/255.255.255.0
hostname: hpc411vm
Final configuration:
private interface: eth0: 10.1.1.175/255.255.255.0
public interface: eth1: 10.1.2.175/255.255.255.0
hostname: hpc411
Step 1: prepare the cluster: Database cleanup and backup
1.1 clean up the xCAT event and audit logs
[root@hpc411vm ~]# tabprune -V -a auditlog > /tmp/auditlog.csv
[root@hpc411vm ~]# tabprune -V -a eventlog > /tmp/eventlog.csv
1.2 create a MN config and db backup
[root@hpc411vm ~]# mkdir /root/backup_MN
[root@hpc411vm ~]# xcatsnap -d /root/backup_MN/
Time Stamp:Tue Oct 1 10:45:19 EDT 2013
Log Directory: /root/backup_MN
Preparation Complete...
Collecting files ...
.
.
.
Retrieving xCAT database...
xCAT database retrieved.
Compiling Information...
Information compiled...
Send /root/backup_MN/xcatsnap.hpc411vm.10011045.log to IBM Support.
Send /root/backup_MN/xcatsnap.hpc411vm.10011045.tar.gz to IBM Support.
[root@hpc411vm ~]#
1.3 note the cluster facing interface
[root@hpc411vm ~]# lsdef -t site -l | grep master
master=10.1.1.75
pcm_master_node=hpc411vm
[root@hpc411vm ~]#
1.4 stop the phpc service
[root@hpc411vm ~]# service phpc stop
Service <WEBGUI> stopped successfully.
Service <jobdt> stopped successfully.
Service <plc> stopped successfully.
Service <plc_group2> stopped successfully.
Service <purger> stopped successfully.
Service <vdatam> stopped successfully.
Stopping the LSF subsystem
Stopping Platform HPC Services: [ OK ]
[root@hpc411vm ~]#
1.5 stop xCAT
[root@hpc411vm ~]# service xcatd stop
Stopping xCATd [ OK ]
[root@hpc411vm ~]#
1.6 stop the database
[root@hpc411vm ~]# service postgresql stop
Stopping postgresql service: [ OK ]
[root@hpc411vm ~]#
2- Step 2: Perform the changes
2.1 Change the hostname
2.1.1 display the actual hostname
[root@hpc411vm ~]# hostname
hpc411vm
2.1.2 change the hostname
[root@hpc411vm ~]# hostname hpc411
2.1.3 display the new hostname
[root@hpc411vm ~]# hostname
hpc411
[root@hpc411vm ~]#
2.1.4 Propagate the changes to the hostfile
[root@hpc411vm ~]# diff -U1 /etc/hosts.orig /etc/hosts
--- /etc/hosts.orig 2013-10-01 10:39:19.623504093 -0400
+++ /etc/hosts 2013-10-01 10:39:47.833089400 -0400
@@ -2,3 +2,3 @@
::1 localhost localhost.localdomain localhost6 localhost6.localdomain6
-10.1.1.75 hpc411vm hpc411vm.private.dns.zone hpc411vm-eth0
-10.1.2.75 hpc411vm-eth1 hpc411vm-eth1.private.dns.zone
+10.1.1.175 hpc411 hpc411.private.dns.zone hpc411-eth0
+10.1.2.175 hpc411-eth1 hpc411-eth1.private.dns.zone
Note: don't forget to update _everything_ (hostnames/IPs)
2.1.5 update the network configuration file to make the change permanent
[root@hpc411vm ~]# diff -U1 /etc/sysconfig/network.orig /etc/sysconfig/network
--- /etc/sysconfig/network.orig 2013-09-25 18:12:19.501411183 -0400
+++ /etc/sysconfig/network 2013-10-01 11:10:09.440275303 -0400
@@ -1,2 +1,2 @@
NETWORKING=yes
-HOSTNAME=hpc411vm
+HOSTNAME=hpc411
[root@hpc411vm ~]#
2.2 IP addresses
2.2.1 update the ifcfg files with the new IP addresses
[root@hpc411vm ~]# diff -U1 /tmp/ifcfg-eth0.orig /etc/sysconfig/network-scripts/ifcfg-eth0
--- /tmp/ifcfg-eth0.orig 2013-10-01 11:24:49.624142527 -0400
+++ /etc/sysconfig/network-scripts/ifcfg-eth0 2013-10-01 11:25:02.714806287 -0400
@@ -7,3 +7,3 @@
BOOTPROTO=none
-IPADDR=10.1.1.75
+IPADDR=10.1.1.175
NETMASK=255.255.255.0
[root@hpc411vm ~]# diff -U1 /tmp/ifcfg-eth1.orig /etc/sysconfig/network-scripts/ifcfg-eth1
--- /tmp/ifcfg-eth1.orig 2013-10-01 11:24:42.527240547 -0400
+++ /etc/sysconfig/network-scripts/ifcfg-eth1 2013-10-01 11:25:10.937851396 -0400
@@ -7,3 +7,3 @@
BOOTPROTO=none
-IPADDR=10.1.2.75
+IPADDR=10.1.2.175
NETMASK=255.255.255.0
[root@hpc411vm ~]#
2.2.2 restart networking
[root@hpc411vm ~]# service network restart
2.2.3 update the xcat db conf file
[root@hpc411 ~]# diff -U1 /tmp/cfgloc.orig /etc/xcat/cfgloc
--- /tmp/cfgloc.orig 2013-09-25 17:46:04.986297739 -0400
+++ /etc/xcat/cfgloc 2013-10-01 11:29:05.688687123 -0400
@@ -1 +1 @@
-Pg:dbname=xcatdb;host=10.1.1.75|xcatadm|pcmdbadm
+Pg:dbname=xcatdb;host=10.1.1.175|xcatadm|pcmdbadm
[root@hpc411 ~]#
2.2.4 update the PostgreSQL Client Authentication Configuration File
[root@hpc411 ~]# diff -U0 /tmp/pg_hba.conf.orig /var/lib/pgsql/data/pg_hba.conf
--- /tmp/pg_hba.conf.orig 2013-09-25 17:45:27.809572751 -0400
+++ /var/lib/pgsql/data/pg_hba.conf 2013-10-01 11:33:12.721083770 -0400
@@ -86 +86 @@
-host all all 10.1.1.75/32 md5
+host all all 10.1.1.175/32 md5
2.2.5 start the database
[root@hpc411 ~]# service postgresql start
Starting postgresql service: [ OK ]
2.2.6 start xCAT
[root@hpc411 ~]# service xcatd start
Starting xCATd [ OK ]
[root@hpc411 ~]#
2.2.7 verify the new database setup
[root@hpc411 ~]# lsxcatd -a
Version 2.8.2 (built Fri Aug 9 03:12:35 EDT 2013)
This is a Management Node
cfgloc=Pg:dbname=xcatdb;host=10.1.1.175|xcatadm
dbengine=Pg
dbname=xcatdb
dbhost=10.1.1.175
dbadmin=xcatadm
[root@hpc411 ~]#
[root@hpc411 ~]# tabdump site
#key,value,comments,disable
"blademaxp","64",,
"fsptimeout","0",,
"installdir","/install",,
"ipmimaxp","64",,
"ipmiretries","3",,
"ipmitimeout","2",,
"consoleondemand","no",,
"maxssh","8",,
"ppcmaxp","64",,
"ppcretry","3",,
"ppctimeout","0",,
"powerinterval","0",,
"syspowerinterval","0",,
"sharedtftp","1",,
"SNsyncfiledir","/var/xcat/syncfiles",,
"nodesyncfiledir","/var/xcat/node/syncfiles",,
"tftpdir","/tftpboot",,
"xcatdport","3001",,
"xcatiport","3002",,
"xcatconfdir","/etc/xcat",,
"useNmapfromMN","no",,
"enableASMI","no",,
"db2installloc","/mntdb2",,
"databaseloc","/var/lib",,
"sshbetweennodes","ALLGROUPS",,
"dnshandler","ddns",,
"vsftp","n",,
"cleanupxcatpost","no",,
"dhcplease","43200",,
"domain","private.dns.zone",,
"master","10.1.1.75",,
"nameservers","10.1.1.75",,
"dhcpinterfaces","eth0",,
"pcm_master_node","hpc411vm",,
"pcm_ntp_servers","pool.ntp.org",,
"pcm_export_home","True",,
"forwarders","10.1.2.1",,
"timezone","America/New_York",,
"pcm_nat_forwarding","eth1|True",,
"runbootscripts","yes",,
"auditskipcmds","clienttype:pcmpoll",,
"phpc_cluster_admin","phpcadmin",,
"phpc_mode","1",,
[root@hpc411 ~]#
2.2.8 update the xCAT database
2.2.8.1 change the site table attributes
[root@hpc411 ~]# chdef -t site master=10.1.1.175
1 object definitions have been created or modified.
[root@hpc411 ~]# chdef -t site pcm_master_node=hpc411
1 object definitions have been created or modified.
[root@hpc411 ~]# chdef -t site nameservers=10.1.1.175
1 object definitions have been created or modified.
2.2.8.1 change the MN name in the xCAT database
Prior to the change, the DNS, DHCP and conserver files need to be updated (the old management node's entry needs to be removed)
2.2.8.1.1 DNS
[root@hpc411 ~]# makedns -d hpc411vm
Handling hpc411vm in /etc/hosts.
Handling hpc411vm-eth1 in /etc/hosts.
Getting reverse zones, this may take several minutes for a large cluster.
Completed getting reverse zones.
Updating zones.
Completed updating zones.
Updating DNS records, this may take several minutes for a large cluster.
Error: Unable to find an IP for hpc411vm-eth1 in hosts table or via system lookup (i.e. /etc/hosts)
Error: No reply received when sending DNS update to zone .private.dns.zone.
Error: No reply received when sending DNS update to zone 1.1.10.IN-ADDR.ARPA.
Completed updating DNS records.
DNS setup is completed
Note: the error message can be ignored, we already changed the hostname/IPs
2.2.8.1.2 DHCP
[root@hpc411 ~]# makedhcp -d hpc411vm
Warning: Unable to find mac address for hpc411vm
[root@hpc411 ~]#
2.2.8.1.3 conserver
[root@hpc411 ~]# makeconservercf -d hpc411vm
[root@hpc411 ~]#
2.2.8.1.4 change the MN name in the xCAT database
[root@hpc411 ~]# chdef -t node -o hpc411vm -n hpc411
Changed the object name from hpc411vm to hpc411.
[root@hpc411 ~]#
2.2.8.1.5 change the IP addresses for the MN in the xCAT database
[root@hpc411 ~]# chdef -t node -o hpc411 nicips='eth0!10.1.1.175,eth1!10.1.2.175' ip=10.1.1.175
1 object definitions have been created or modified.
[root@hpc411 ~]#
Modify the networks as well using tabedit
Before modification:
[root@hpc411 ~]# tabdump networks
#netname,net,mask,mgtifname,gateway,dhcpserver,tftpserver,nameservers,ntpservers,logservers,dynamicrange,staticrange,staticrangeincrement,nodehostname,ddnsdomain,vlanid,domain,comments,disable
"provision","10.1.1.0","255.255.255.0","eth0","<xcatmaster>",,"10.1.1.75",,,,"10.1.1.201-10.1.1.254","10.1.1.3-10.1.1.200","1",,,,"private.dns.zone",,
"public","10.1.2.0","255.255.255.0","eth1","<xcatmaster>",,"10.1.2.75",,,,,"10.1.2.75-10.1.2.254","1",,,,,,
tabedit networks
After modification:
[root@hpc411 ~]# tabdump networks
#netname,net,mask,mgtifname,gateway,dhcpserver,tftpserver,nameservers,ntpservers,logservers,dynamicrange,staticrange,staticrangeincrement,nodehostname,ddnsdomain,vlanid,domain,comments,disable
"provision","10.1.1.0","255.255.255.0","eth0","<xcatmaster>",,"10.1.1.175",,,,"10.1.1.201-10.1.1.254","10.1.1.3-10.1.1.200","1",,,,"private.dns.zone",,
"public","10.1.2.0","255.255.255.0","eth1","<xcatmaster>",,"10.1.2.175",,,,,"10.1.2.75-10.1.2.254","1",,,,,,
[root@hpc411 ~]#
2.2.8.1.6 add the MN to the DNS, DHCP and conserver configuration files
[root@hpc411 ~]# makedns -n
Handling hpc411 in /etc/hosts.
Handling localhost in /etc/hosts.
Handling localhost in /etc/hosts.
Handling hpc411-eth1 in /etc/hosts.
Getting reverse zones, this may take several minutes for a large cluster.
Completed getting reverse zones.
Updating zones.
Completed updating zones.
Restarting named
Restarting named complete
Updating DNS records, this may take several minutes for a large cluster.
Completed updating DNS records.
DNS setup is completed
[root@hpc411 ~]# makedhcp -n
Renamed existing dhcp configuration file to /etc/dhcp/dhcpd.conf.xcatbak
[root@hpc411 ~]# makeconservercf
[root@hpc411 ~]#
2.2.8.1.7 update the perf, LSF, EGO and GUI configuration files with the new name
Not modified: /install/shared/ibm/platform_lsf/9.1/lsf_quick_admin.html
/install/shared/ibm/platform_lsf/conf/lsf.conf
/install/shared/ibm/platform_lsf/conf/lsf.cluster.phpc_cluster
/install/shared/ibm/platform_lsf/conf/ego/phpc_cluster/kernel/ego.conf
/install/shared/ibm/platform_lsf/work/phpc_cluster/ego/vemkd/client/status
/opt/pcm/web-portal/gui/conf/wsm_webgui.conf
/opt/pcm/web-portal/perf/conf/wsm/wsm_purger.conf
/opt/pcm/web-portal/perf/conf/wsm/wsm_plc.conf
/opt/pcm/web-portal/perf/conf/wsm/wsm_vdatam.conf
/opt/pcm/web-portal/perf/conf/wsm/wsm_jobdt.conf
/opt/pcm/web-portal/perf/conf/wsm/wsm_plc_group2.conf
2.2.8.1.8 update the config files with the new ip
/opt/pcm/web-portal/perf/conf/datasource.xml
2.2.8.1.9 Rebuild the genesis image (for autodiscovery)
[root@hpc411 ~]# mknb x86_64
Creating genesis.fs.x86_64.lzma in /tftpboot/xcat
[root@hpc411 ~]#