This is done by using a script in Python. The source IBM Cloud Object Storage (COS) instance will be the instance from which you are migrating, and the target/destination COS instance is the instance to which instance you will be migrating. This script uses ibm-cos and ibm-platform-services SDKs. An example of the architecture is below:
The following are the environment variables that the scripts use:
You can create and download your IBM Cloud API key in the IBM cloud console at Manage > Access (IAM) > API keys.
You can find GUID for the source and target instances in the cloud console resource list. Type in the name of each COS instance and click on the white part of the row of the instance to retrieve the GUID.
To find your US COS endpoint, click on your source COS instance from the Resource List in the navigation menu. Then, click on Endpoints and make sure the Selection Location dropdown says us-geo. Select the region that your buckets are in and make sure to prepend https:// in the environment variable.
Leave the values for the IAM_POLICY_MANAGEMENT_URL, IAM_POLICY_MANAGEMENT_AUTHTYPE and DISABLE_RULES as is.
The iam_account_id is the same value as your ibmcloud_api_key.
The suffix is used to append a name at the end of the newly created bucket since bucket names are globally unique.
After the environment variables have been set, you may now run the script. You can find the code of the script below here.
import os
import ibm_boto3
from ibm_botocore.client import Config
from ibm_botocore.config import Config
from ibm_platform_services import IamPolicyManagementV1
# this is the suffix used for the new naming convention of buckets
suffix="-"+os.environ['SUFFIX']
iamAccountID=os.environ.get('IAM_ACCOUNT_ID')
# function to get region of a bucket
def getBucketRegion(locationConstraint):
if locationConstraint == "us-smart" or locationConstraint == "us-standard" or locationConstraint == "us-vault" or locationConstraint == "us-cold":
return "us-geo"
if locationConstraint == "us-east-smart" or locationConstraint == "us-east-standard" or locationConstraint == "us-east-vault" or locationConstraint == "us-east-cold":
return "us-east"
if locationConstraint == "us-south-smart" or locationConstraint == "us-south-standard" or locationConstraint == "us-south-vault" or locationConstraint == "us-south-cold":
return "us-south"
return ""
# function to get region of the URL endpoint
def getUrlRegion():
endpoint_url=os.environ['US_GEO']
if endpoint_url=="https://s3.us.cloud-object-storage.appdomain.cloud" or endpoint_url=="https://s3.private.us.cloud-object-storage.appdomain.cloud" or endpoint_url=="https://s3.direct.us.cloud-object-storage.appdomain.cloud":
return "us-geo"
if endpoint_url=="https://s3.dal.us.cloud-object-storage.appdomain.cloud" or endpoint_url=="https://s3.private.dal.us.cloud-object-storage.appdomain.cloud" or endpoint_url=="https://s3.direct.dal.us.cloud-object-storage.appdomain.cloud":
return "dallas"
if endpoint_url=="https://s3.wdc.us.cloud-object-storage.appdomain.cloud" or endpoint_url=="https://s3.private.wdc.us.cloud-object-storage.appdomain.cloud" or endpoint_url=="https://s3.direct.wdc.us.cloud-object-storage.appdomain.cloud":
return "washington"
if endpoint_url=="https://s3.sjc.us.cloud-object-storage.appdomain.cloud" or endpoint_url=="https://s3.private.sjc.us.cloud-object-storage.appdomain.cloud" or endpoint_url=="https://s3.direct.sjc.us.cloud-object-storage.appdomain.cloud":
return "san jose"
if endpoint_url=="https://s3.us-east.cloud-object-storage.appdomain.cloud" or endpoint_url=="https://s3.private.us-east.cloud-object-storage.appdomain.cloud" or endpoint_url=="https://s3.direct.us-east.cloud-object-storage.appdomain.cloud":
return "us-east"
if endpoint_url=="https://s3.us-south.cloud-object-storage.appdomain.cloud" or endpoint_url=="https://s3.private.us-south.cloud-object-storage.appdomain.cloud" or endpoint_url=="https://s3.direct.us-south.cloud-object-storage.appdomain.cloud":
return "us-south"
return ""
# function to list buckets
def get_buckets1(type,cos):
bucketNames=[]
try:
buckets=cos.list_buckets()["Buckets"]
except Exception as e:
print("Error: Unable to get COS Buckets.",e)
for bucket in buckets:
try:
request =cos.get_bucket_location(Bucket=bucket["Name"])
bucketLocation=request["LocationConstraint"]
except:
#this except accounts for when the bucket is not in the targeted region
bucketLocation=""
if type == "target" and getUrlRegion()==getBucketRegion(bucketLocation):
bucketNames.append(bucket["Name"])
elif getUrlRegion()==getBucketRegion(bucketLocation):
bucketNames.append(bucket["Name"]+suffix)
return bucketNames
# function to create buckets
def create_buckets(targetBucketNames):
# Destination cos client connection
destCos = ibm_boto3.client("s3",
ibm_api_key_id=os.environ.get('IBMCLOUD_API_KEY'),
ibm_service_instance_id=os.environ['DEST_SERVICE_INSTANCE_ID'],
config=Config(signature_version="oauth"),
endpoint_url=os.environ['US_GEO']
)
location =getUrlRegion()+"-smart"
for bucketName in targetBucketNames:
try:
destCos.create_bucket(Bucket=bucketName, CreateBucketConfiguration={
'LocationConstraint': location
})
print("Created bucket:",bucketName)
except Exception as e:
print("ERROR: Unable to create bucket.",e)
def migrateBuckets():
# Create client connection
cos = ibm_boto3.client("s3",
ibm_api_key_id=os.environ.get('IBMCLOUD_API_KEY'),
ibm_service_instance_id=os.environ['SERVICE_INSTANCE_ID'],
config=Config(signature_version="oauth"),
endpoint_url=os.environ['US_GEO']
)
# Getting all source buckets
sourceBucketNames=get_buckets1("source",cos)
print("All buckets from source instance from "+getUrlRegion()+" region:",sourceBucketNames)
# Destination cos client connection
destCos = ibm_boto3.client("s3",
ibm_api_key_id=os.environ.get('IBMCLOUD_API_KEY'),
ibm_service_instance_id=os.environ['DEST_SERVICE_INSTANCE_ID'],
config=Config(signature_version="oauth"),
endpoint_url=os.environ['US_GEO']
)
# Getting all target buckets to avoid duplicates
targetBucketNames=get_buckets1("target",destCos)
print("All buckets from target instance from "+getUrlRegion()+" region:",targetBucketNames)
# excluding buckets that already exists
targetBucketNames=[x for x in sourceBucketNames if x not in targetBucketNames]
print("All buckets from target instance without duplicates:",targetBucketNames)
# creating buckets on target cos instance
create_buckets(targetBucketNames)
# function to get region of a bucket
def getBucketRegion(locationConstraint):
if locationConstraint == "us-smart" or locationConstraint == "us-standard" or locationConstraint == "us-vault" or locationConstraint == "us-cold":
return "us-geo"
if locationConstraint == "us-east-smart" or locationConstraint == "us-east-standard" or locationConstraint == "us-east-vault" or locationConstraint == "us-east-cold":
return "us-east"
if locationConstraint == "us-south-smart" or locationConstraint == "us-south-standard" or locationConstraint == "us-south-vault" or locationConstraint == "us-south-cold":
return "us-south"
return ""
# function to get region of the URL endpoint
def getUrlRegion():
endpoint_url=os.environ['US_GEO']
if endpoint_url=="https://s3.us.cloud-object-storage.appdomain.cloud" or endpoint_url=="https://s3.private.us.cloud-object-storage.appdomain.cloud" or endpoint_url=="https://s3.direct.us.cloud-object-storage.appdomain.cloud":
return "us-geo"
if endpoint_url=="https://s3.dal.us.cloud-object-storage.appdomain.cloud" or endpoint_url=="https://s3.private.dal.us.cloud-object-storage.appdomain.cloud" or endpoint_url=="https://s3.direct.dal.us.cloud-object-storage.appdomain.cloud":
return "dallas"
if endpoint_url=="https://s3.wdc.us.cloud-object-storage.appdomain.cloud" or endpoint_url=="https://s3.private.wdc.us.cloud-object-storage.appdomain.cloud" or endpoint_url=="https://s3.direct.wdc.us.cloud-object-storage.appdomain.cloud":
return "washington"
if endpoint_url=="https://s3.sjc.us.cloud-object-storage.appdomain.cloud" or endpoint_url=="https://s3.private.sjc.us.cloud-object-storage.appdomain.cloud" or endpoint_url=="https://s3.direct.sjc.us.cloud-object-storage.appdomain.cloud":
return "san jose"
if endpoint_url=="https://s3.us-east.cloud-object-storage.appdomain.cloud" or endpoint_url=="https://s3.private.us-east.cloud-object-storage.appdomain.cloud" or endpoint_url=="https://s3.direct.us-east.cloud-object-storage.appdomain.cloud":
return "us-east"
if endpoint_url=="https://s3.us-south.cloud-object-storage.appdomain.cloud" or endpoint_url=="https://s3.private.us-south.cloud-object-storage.appdomain.cloud" or endpoint_url=="https://s3.direct.us-south.cloud-object-storage.appdomain.cloud":
return "us-south"
return ""
# function to list buckets
def get_buckets2(type,cos):
bucketNames=[]
try:
buckets=cos.list_buckets()["Buckets"]
except Exception as e:
print("Error: Unable to get COS Buckets.",e)
for bucket in buckets:
try:
request =cos.get_bucket_location(Bucket=bucket["Name"])
bucketLocation=request["LocationConstraint"]
except:
#this except accounts for when the bucket is not in the targeted region
bucketLocation=""
if getUrlRegion()==getBucketRegion(bucketLocation):
bucketNames.append(bucket["Name"])
return bucketNames
#function to add replication rules to buckets
def addReplicationRules(buckets,targetID,cos):
status='Enabled'
if os.environ['DISABLE_RULES']=="true":
status='Disabled'
# this is the suffix used for the new naming convention of buckets
suffix="-"+os.environ['SUFFIX']
for bucket in buckets:
try:
cos.put_bucket_replication(Bucket=bucket, ReplicationConfiguration={
'Rules': [
{
'Priority': 0,
'Status': status,
'Filter': {},
'Destination': {
'Bucket': 'crn:v1:bluemix:public:cloud-object-storage:global:a/'+iamAccountID+':'+targetID+':bucket:'+bucket+suffix,
}, 'DeleteMarkerReplication': {
'Status': 'Enabled'
}
},
]
})
if os.environ['DISABLE_RULES']!="true":
print("added replication rule to bucket",bucket)
else:
print("disabled replication rule to bucket",bucket)
except Exception as e:
print("Error: Unable to add replication rule to bucket",bucket,e)
# function to enable versioning on buckets
def enableVersioning(buckets,cos):
for bucket in buckets:
try:
cos.put_bucket_versioning(
Bucket=bucket,
VersioningConfiguration={
'Status': 'Enabled'
},
ExpectedBucketOwner='string'
)
print("versioning enable to bucket",bucket)
except Exception as e:
print("Error: Unable to enable versioning to bucket",bucket,e)
#function to create iam policy to for the source cos instance to write data to the target instance
def addAuthorization(sourceID,targetID):
try:
#Create IAM client
service_client = IamPolicyManagementV1.new_instance()
#service_client.create_policy(type="authorization",subjects=[policy_subjects],roles=[policy_roles],resources=[policy_resources])
service_client.create_policy(type="authorization",subjects=[{"attributes":[{"name": "accountId","value":iamAccountID},{"name": "serviceName", "value": "cloud-object-storage"},{"name":"serviceInstance", "value":sourceID}]}],roles=[{"role_id": "crn:v1:bluemix:public:iam::::serviceRole:Writer"}],resources=[{"attributes":[{"name": "accountId","value":iamAccountID},{"name": "serviceName","value": "cloud-object-storage"},{"name":"serviceInstance", "value":targetID}]}])
print("created authorization policy")
except Exception as e:
print("Warning: Unable to create policy. Please ignore if policy already exists",e)
def addReplicationRulesToMigratedBuckets():
# Create client connection
cos = ibm_boto3.client("s3",
ibm_api_key_id=os.environ.get('IBMCLOUD_API_KEY'),
ibm_service_instance_id=os.environ['SERVICE_INSTANCE_ID'],
config=Config(signature_version="oauth"),
endpoint_url=os.environ['US_GEO']
)
sourceCosInstanceID=os.environ['SERVICE_INSTANCE_ID']
# Getting all source buckets
sourceBucketNames=get_buckets2("source",cos)
#enable versioning for both cos instances
print("enable versioning for source instances")
enableVersioning(sourceBucketNames,cos)
# Destination cos client connection
destCos = ibm_boto3.client("s3",
ibm_api_key_id=os.environ.get('IBMCLOUD_API_KEY'),
ibm_service_instance_id=os.environ['DEST_SERVICE_INSTANCE_ID'],
config=Config(signature_version="oauth"),
endpoint_url=os.environ['US_GEO']
)
targetCosInstanceId=os.environ['DEST_SERVICE_INSTANCE_ID']
targetBucketNames = get_buckets2("target",destCos)
print("enable versioning for target instances")
enableVersioning(targetBucketNames,destCos)
#add authorization from source cos instance to target cos instance
addAuthorization(sourceCosInstanceID,targetCosInstanceId)
#add replication rules to buckets
addReplicationRules(sourceBucketNames,targetCosInstanceId,cos)
# function to get region of a bucket
def getBucketRegion(locationConstraint):
if locationConstraint == "us-smart" or locationConstraint == "us-standard" or locationConstraint == "us-vault" or locationConstraint == "us-cold":
return "us-geo"
if locationConstraint == "us-east-smart" or locationConstraint == "us-east-standard" or locationConstraint == "us-east-vault" or locationConstraint == "us-east-cold":
return "us-east"
if locationConstraint == "us-south-smart" or locationConstraint == "us-south-standard" or locationConstraint == "us-south-vault" or locationConstraint == "us-south-cold":
return "us-south"
return ""
# function to get region of the URL endpoint
def getUrlRegion():
endpoint_url=os.environ['US_GEO']
if endpoint_url=="https://s3.us.cloud-object-storage.appdomain.cloud" or endpoint_url=="https://s3.private.us.cloud-object-storage.appdomain.cloud" or endpoint_url=="https://s3.direct.us.cloud-object-storage.appdomain.cloud":
return "us-geo"
if endpoint_url=="https://s3.dal.us.cloud-object-storage.appdomain.cloud" or endpoint_url=="https://s3.private.dal.us.cloud-object-storage.appdomain.cloud" or endpoint_url=="https://s3.direct.dal.us.cloud-object-storage.appdomain.cloud":
return "dallas"
if endpoint_url=="https://s3.wdc.us.cloud-object-storage.appdomain.cloud" or endpoint_url=="https://s3.private.wdc.us.cloud-object-storage.appdomain.cloud" or endpoint_url=="https://s3.direct.wdc.us.cloud-object-storage.appdomain.cloud":
return "washington"
if endpoint_url=="https://s3.sjc.us.cloud-object-storage.appdomain.cloud" or endpoint_url=="https://s3.private.sjc.us.cloud-object-storage.appdomain.cloud" or endpoint_url=="https://s3.direct.sjc.us.cloud-object-storage.appdomain.cloud":
return "san jose"
if endpoint_url=="https://s3.us-east.cloud-object-storage.appdomain.cloud" or endpoint_url=="https://s3.private.us-east.cloud-object-storage.appdomain.cloud" or endpoint_url=="https://s3.direct.us-east.cloud-object-storage.appdomain.cloud":
return "us-east"
if endpoint_url=="https://s3.us-south.cloud-object-storage.appdomain.cloud" or endpoint_url=="https://s3.private.us-south.cloud-object-storage.appdomain.cloud" or endpoint_url=="https://s3.direct.us-south.cloud-object-storage.appdomain.cloud":
return "us-south"
return ""
# function to list buckets
def get_buckets3(type,cos):
bucketNames=[]
try:
buckets=cos.list_buckets()["Buckets"]
#print(buckets)
except Exception as e:
print("Error: Unable to get COS Buckets.",e)
for bucket in buckets:
try:
request =cos.get_bucket_location(Bucket=bucket["Name"])
bucketLocation=request["LocationConstraint"]
except:
#this except accounts for when the bucket is not in the targeted region
bucketLocation=""
if getUrlRegion()==getBucketRegion(bucketLocation):
bucketNames.append(bucket["Name"])
return bucketNames
def copy_in_place(bucket):
# Create client connection
cos = ibm_boto3.client("s3",
ibm_api_key_id=os.environ.get('IBMCLOUD_API_KEY'),
ibm_service_instance_id=os.environ['SERVICE_INSTANCE_ID'],
config=Config(signature_version="oauth"),
endpoint_url=os.environ['US_GEO']
)
cosObjects=cos.list_objects(Bucket=bucket)
if "Contents" not in cosObjects:
print("source bucket is empty")
return
print("Priming existing objects in " + bucket + " for replication...")
paginator = cos.get_paginator('list_objects_v2')
pages = paginator.paginate(Bucket=bucket)
for page in pages:
for obj in page['Contents']:
key = obj['Key']
print(" * Copying " + key + " in place...")
try:
headers = cos.head_object(
Bucket=bucket,
Key=key
)
md = headers["Metadata"]
cos.copy_object(
CopySource={
'Bucket': bucket,
'Key': key
},
Bucket=bucket,
Key=key,
TaggingDirective='COPY',
MetadataDirective='REPLACE',
Metadata=md
)
print(" Success!")
except Exception as e:
print(" Unable to copy object: {0}".format(e))
print("Existing objects in " + bucket + " are now subject to replication rules.")
def replicateExistingFiles():
# Create client connection
cos = ibm_boto3.client("s3",
ibm_api_key_id=os.environ.get('IBMCLOUD_API_KEY'),
ibm_service_instance_id=os.environ['SERVICE_INSTANCE_ID'],
config=Config(signature_version="oauth"),
endpoint_url=os.environ['US_GEO']
)
# Getting all source buckets
sourceBucketNames=get_buckets3("source",cos)
print("All source buckets to replicate",sourceBucketNames)
# Copy data from source to target bucket
for bucket in sourceBucketNames:
copy_in_place(bucket)
# main
migrateBuckets()
addReplicationRulesToMigratedBuckets()
if os.environ['DISABLE_RULES']!="true":
replicateExistingFiles()
This script was designed to help users migrate one COS instance to another instance on the same account for a US region. The function calls in the main function are executed in the following order.
If you want to disable the replication rules for the buckets, set DISABLE_RULES to true and run the script again.
By following these steps, you will successfully migrate buckets from one US IBM Cloud Object Storage (COS) instance to another per region.
If you have any questions, you can reach out to me on LinkedIn.