Friday, September 29, 2017

Object Pools

                         Object Pools


This blog assumes you have object Pool beta cluster up and running with packages found in below Repo.

cat /etc/yum.repos.d/maprtech.repo 
[maprtech]
name=MapR Technologies
baseurl=http://username:password@stage.mapr.com/beta/objectpools/redhat/
enabled=1
gpgcheck=0
protect=1

[centos]
name=centos, Inc.
baseurl=http://mirror.centos.org/centos/6.9/os/x86_64/
enabled=1
gpgcheck=0

This Blog will mainly show how to setup tiering policy for files to be offloaded to AWS ( cold storage ) depending on their size ( can be associated with age of files , last modified time etc )


1) Create a credential.txt file with details below in json format to connect to Tier/AWS for offload data as per policy set .

[root@node113rhel67 ~]# cat /tmp/credential.txt
{
  "bucketName" : "abizerobjectpooltests",
  "region": "aws_us_east",
  "credentials" : {
    "accessKey" : "XXXXX",
    "secretKey" : "XXXX+XXX"
  }
}
2) Create a tier by running the following command .

[root@node113rhel67 ~]# maprcli tier create -name aws_us_east1 -type cold -url s3-external-1.amazonaws.com -credential /tmp/credential.txt
[root@node113rhel67 ~]# maprcli tier list
maxobjsize  tiername     tiertype  tierid   throttling  credential           url                          
8388608     aws_us_east1  cold      208859171  0           /tmp/credential.txt  s3-external-1.amazonaws.com
[root@node113rhel67 ~]#

3) Create a tier rule on what files need to be offloaded .

[root@node113rhel67 ~]# maprcli tier rule create -name size1 -expr "s:1m"
[root@node113rhel67 ~]# maprcli tier rule list
expression  rulename  ruleid  
s:1m        size1     1       
[root@node113rhel67 ~]# 

Note :- 

When a tier is created, MFS creates a volume with name maprtier_<tiername>, mounted in /, for the tier. This volume stores all the metadata information for the tier and information on all jobs running on the tier

[root@node113rhel67 logs]# hadoop mfs -ls / | grep maprtier_aws_us_east1
vrwxr-xr-x  Z U U   3 root root          1 2017-09-28 17:37  268435456 /maprtier_aws_us_east1
       p maprtier_aws_us_east1 default 2049.46.131252 -> 2176.16.2  node114rhel67:5660 node113rhel67:5660 node115rhel67:5660 
[root@node113rhel67 logs]# 

4) Create a volume with tearing enabled and check the properties by running volume info commands.

[root@node113rhel67 ~]# maprcli volume create -name objectpool -path /objectpool -tieringenable true -tiername aws_us_east1 -tieringrule size1
[root@node113rhel67 logs]# maprcli volume info -name objectpool -json 
{
"timestamp":1506646122308,
"timeofday":"2017-09-28 05:48:42.308 GMT-0700",
"status":"OK",
"total":1,
"data":[
{
"acl":{
"Principal":"User root",
"Allowed actions":[
"dump",
"restore",
"m",
"a",
"d",
"fc"
]
},
"creator":"root",
"aename":"root",
"aetype":0,
"numreplicas":"3",
"minreplicas":"2",
"nsNumReplicas":"3",
"nsMinReplicas":"2",
"allowGrant":"false",
"reReplTimeOutSec":"0",
"replicationtype":"high_throughput",
"rackpath":"/data",
"mirrorthrottle":"1",
"accesstime":"September 28, 2017",
"readonly":"0",
"mountdir":"/objectpool",
"volumename":"objectpool",
"mounted":1,
"quota":"0",
"advisoryquota":"0",
"snapshotcount":"0",
"logicalUsed":"9",
"used":"2",
"snapshotused":"0",
"totalused":"2",
"scheduleid":0,
"schedulename":"",
"mirrorscheduleid":0,
"volumetype":0,
"mirrortype":3,
"creatorcontainerid":2177,
"creatorvolumeuuid":"-7065441664652339613:5265903001908611870",
"volumeid":151612901,
"actualreplication":[
0,
0,
0,
100,
0,
0,
0,
0,
0,
0,
0
],
"nameContainerSizeMB":0,
"nameContainerId":2177,
"needsGfsck":false,
"maxinodesalarmthreshold":"0",
"dbrepllagsecalarmthresh":"0",
"limitspread":"true",
"partlyOutOfTopology":0,
"auditVolume":0,
"audited":0,
"coalesceInterval":60,
"enableddataauditoperations":"getattr,setattr,chown,chperm,chgrp,getxattr,listxattr,setxattr,removexattr,read,write,create,delete,mkdir,readdir,rmdir,createsym,lookup,rename,createdev,truncate,tablecfcreate,tablecfdelete,tablecfmodify,tablecfScan,tableget,tableput,tablescan,tablecreate,tableinfo,tablemodify,getperm,getpathforfid,hardlink",
"disableddataauditoperations":"",
"volumeAces":{
"readAce":"p",
"writeAce":"p"
},
"fixCreatorId":"false",
"ReplTypeConversionInProgress":"0",
"tier":{
"enable":"true",
"tierId":"208859171",
"ruleId":"1",
"encryption":"true",
"recallExpiryTime":"1"
}
}
]
}
[root@node113rhel67 logs]# maprcli dump volumeinfo -volumename objectpool -json | grep -w ContainerId
"ContainerId":2177,
"ContainerId":2178,
"ContainerId":2179,
"ContainerId":2180,
"ContainerId":2181,
"ContainerId":2182,
[root@node113rhel67 logs]# 

5) Copy dummy file with size larger than 1 mb into the volume with tiering enabled .

hadoop fs -put cldb.log /objectpool/

6) Manually offload the volume once command succeeds you can use "tier status" commands to check the status of offloads . Bingo! my test file were offloaded .


[root@node113rhel67 logs]# maprcli volume offload -name objectpool
[root@node113rhel67 logs]# maprcli volume tierstatus -name objectpool -json
{
"timestamp":1506646048027,
"timeofday":"2017-09-28 05:47:28.027 GMT-0700",
"status":"OK",
"total":1,
"data":[
{
"offload":{
"state":"Success",
"startTime":"2017-09-28 05:37:31.183 GMT-0700",
"endTime":"2017-09-28 05:37:59.722 GMT-0700",
"gateway":"10.10.70.113:8660"
}
}
]
}
[root@node113rhel67 logs]#





DEBUG Logging :

To enable Debug logging for MASTGateway we can use below command .

maprcli trace setlevel -module MASTGateway -level Debug -port 8660


Log interpretation/Troubleshooting :

1) When offload command is run G/W first gets the Tier id.

2017-09-28 17:37:19,3213 DEBUG MASTGateway fs/mastgateway/src/cc/common/mastgateway.cc:1186 Thread: 3518 TierEncryption from cmd : 1, key : 1
2017-09-28 17:37:19,3213 DEBUG MASTGateway fs/mastgateway/src/cc/common/tierreadhandle.cc:277 Thread: 3518 Tier 208859171 config changed

2) Now the job is internally kicked to start the offload to s3-external-1.amazonaws.com for the volume in question.

2017-09-28 17:37:19,3242 INFO MASTGateway fs/mastgateway/src/cc/common/offloader.cc:653 Thread: 3518 TierOffload: job started with URL s3-external-1.amazonaws.com for volId: 151612901
2017-09-28 17:37:19,3938 DEBUG MASTGateway fs/mastgateway/src/cc/common/offloader.cc:978 Thread: 3518 TierOffload: Enter VolumeOffloadGetContainers, volId: 151612901

3) Now info for all the container which are part of volume are gathered

2017-09-28 17:37:19,4025 DEBUG MASTGateway fs/mastgateway/src/cc/common/offloader.cc:1040 Thread: 3518 TierOffload: VolumeOffloadGetContainers: written 6 containers for volId 151612901
2017-09-28 17:37:19,4059 DEBUG MASTGateway fs/mastgateway/src/cc/common/offloader.cc:1049 Thread: 3518 TierOffload: Exit VolumeOffloadGetContainers, volId: 151612901, numCntrs: 6, err: 0

4) Now we will try to get all the snapshots for the volume in question from Name Container .

2017-09-28 17:37:19,4109 DEBUG MASTGateway fs/mastgateway/src/cc/common/offloader.cc:870 Thread: 3518 TierOffload: GetSnapshotList returned 0 snapshots, volId: 151612901, cid: 2177
2017-09-28 17:37:19,4133 DEBUG MASTGateway fs/mastgateway/src/cc/common/offloader.cc:895 Thread: 3518 TierOffload: Exit VolumeOffloadGetSnapToRootCidMap, volId: 151612901, err: 0

5) Then it will check if there are any snapcid and get all inode details for every container and snapshot.


2017-09-28 17:37:19,6461 DEBUG MASTGateway fs/mastgateway/src/cc/common/offloader.cc:1499 Thread: 3528 TierOffload: Exit VolumeOffloadGetSnapshots, voId: 151612901, cid: 2177, err: 0

2017-09-28 17:37:19,6474 DEBUG MASTGateway fs/mastgateway/src/cc/common/offloader.cc:1520 Thread: 3528 TierOffload: Enter VolumeOffloadGetInodes, volId: 151612901, cid: 2177

6) i) One all details are gathered offload thread starts offload of the fid's by converting the fid data into object and then doing put once Bucket on AWS is created..


2017-09-28 17:37:33,0445 DEBUG MASTGateway fs/mastgateway/src/cc/common/offloader.cc:2254 Thread: 3530 Exit VolumeOffloadUploadData, offloadFid: 2180.32.131286, startOff: 9371648, numVcds: 2, err: 0
2017-09-28 17:37:33,0445 DEBUG MASTGateway fs/mastgateway/src/cc/common/offloader.cc:2203 Thread: 3530 TierOffload: Exit VolumeOffloadReadNUploadData, offloadFid: 2180.32.131286, offset: 9502720, len: 0, err: 0
2017-09-28 17:37:33,0445 DEBUG MASTGateway fs/mastgateway/src/cc/common/offloader.cc:2085 Thread: 3530 TierOffload: Exit VolumeOffloadCheckVcdsNUploadData, offloadFid: 2180.32.131286, offset: 9240576, len: 262144, err: 0
2017-09-28 17:37:33,0445 DEBUG MASTGateway fs/mastgateway/src/cc/common/offloader.cc:1976 Thread: 3530 TierOffload: Exit VolumeOffloadGetOwnedBlocksOneFid, offloadFid: 2180.32.131286, err: 0
2017-09-28 17:37:33,0478 DEBUG MASTGateway fs/mastgateway/src/cc/common/offloader.cc:1879 Thread: 3530 TierOffload: Exit VolumeOffloadGetOwnedBlocks, volId: 151612901, rwcid: 2180, err: 0
2017-09-28 17:37:33,0478 DEBUG MASTGateway fs/mastgateway/src/cc/common/tierwritehandle.cc:1158 Thread: 3530 hid 21, objId 9096de5.884.1,  packing object.
2017-09-28 17:37:33,0479 DEBUG MASTGateway fs/mastgateway/src/cc/common/tierwritehandle.cc:1168 Thread: 3530 hid 21, objId 9096de5.884.1, Packing complete
2017-09-28 17:37:33,0508 DEBUG MASTGateway fs/mastgateway/src/cc/common/tierwritehandle.cc:1190 Thread: 3530 hid 21, objId 9096de5.884.1, objmap write complete
2017-09-28 17:37:33,0508 DEBUG MASTGateway fs/mastgateway/src/cc/common/tierwritehandle.cc:1641 Thread: 3530 key for 1 is |0.9096de5.884.1|
2017-09-28 17:37:33,0581 DEBUG MASTGateway fs/mastgateway/src/cc/s3/curlclient.cc:142 Thread: 3530 CurlRequest Timeout: 60000 msec
2017-09-28 17:37:33,0894 DEBUG MASTGateway fs/mastgateway/src/cc/s3/curlclient.cc:47 Thread: 3528 CURLINFO_TEXT connected
2017-09-28 17:37:33,0895 DEBUG MASTGateway fs/mastgateway/src/cc/s3/curlclient.cc:47 Thread: 3528 CURLINFO_TEXT Connected to s3-external-1.amazonaws.com (52.216.
2017-09-28 17:37:33,0928 DEBUG MASTGateway fs/mastgateway/src/cc/s3/curlclient.cc:47 Thread: 3530 CURLINFO_TEXT About to connect() to s3-external-1.amazonaws.com
2017-09-28 17:37:33,0928 DEBUG MASTGateway fs/mastgateway/src/cc/s3/curlclient.cc:47 Thread: 3530 CURLINFO_TEXT   Trying 54.231.97.220...
2017-09-28 17:37:33,1717 DEBUG MASTGateway fs/mastgateway/src/cc/s3/curlclient.cc:47 Thread: 3530 CURLINFO_TEXT connected
2017-09-28 17:37:33,1717 DEBUG MASTGateway fs/mastgateway/src/cc/s3/curlclient.cc:47 Thread: 3530 CURLINFO_TEXT Connected to s3-external-1.amazonaws.com (54.231.
2017-09-28 17:37:33,1732 DEBUG MASTGateway fs/mastgateway/src/cc/s3/curlclient.cc:47 Thread: 3528 CURLINFO_TEXT skipping SSL peer certificate verification
2017-09-28 17:37:33,2535 DEBUG MASTGateway fs/mastgateway/src/cc/s3/curlclient.cc:47 Thread: 3530 CURLINFO_TEXT skipping SSL peer certificate verification
2017-09-28 17:37:33,2577 DEBUG MASTGateway fs/mastgateway/src/cc/s3/curlclient.cc:47 Thread: 3528 CURLINFO_TEXT SSL connection using TLS_ECDHE_RSA_WITH_AES_128_C
2017-09-28 17:37:33,2577 DEBUG MASTGateway fs/mastgateway/src/cc/s3/curlclient.cc:47 Thread: 3528 CURLINFO_TEXT Server certificate:
2017-09-28 17:37:33,2577 DEBUG MASTGateway fs/mastgateway/src/cc/s3/curlclient.cc:47 Thread: 3528 CURLINFO_TEXT         subject: CN=*.s3-external-1.amazonaws.com,O=Amaz
2017-09-28 17:37:33,2577 DEBUG MASTGateway fs/mastgateway/src/cc/s3/curlclient.cc:47 Thread: 3528 CURLINFO_TEXT         start date: Jul 18 00:00:00 2016 GMT
2017-09-28 17:37:33,2577 DEBUG MASTGateway fs/mastgateway/src/cc/s3/curlclient.cc:47 Thread: 3528 CURLINFO_TEXT         expire date: Oct 26 12:00:00 2017 GMT
2017-09-28 17:37:33,2577 DEBUG MASTGateway fs/mastgateway/src/cc/s3/curlclient.cc:47 Thread: 3528 CURLINFO_TEXT         common name: *.s3-external-1.amazonaws.com
2017-09-28 17:37:33,2577 DEBUG MASTGateway fs/mastgateway/src/cc/s3/curlclient.cc:47 Thread: 3528 CURLINFO_TEXT         start date: Jul 18 00:00:00 2016 GMT
2017-09-28 17:37:33,2577 DEBUG MASTGateway fs/mastgateway/src/cc/s3/curlclient.cc:47 Thread: 3528 CURLINFO_TEXT         expire date: Oct 26 12:00:00 2017 GMT
2017-09-28 17:37:33,2577 DEBUG MASTGateway fs/mastgateway/src/cc/s3/curlclient.cc:47 Thread: 3528 CURLINFO_TEXT         common name: *.s3-external-1.amazonaws.com
2017-09-28 17:37:33,2577 DEBUG MASTGateway fs/mastgateway/src/cc/s3/curlclient.cc:47 Thread: 3528 CURLINFO_TEXT         issuer: CN=DigiCert Baltimore CA-2 G2,OU=w
ww.dig
2017-09-28 17:37:33,2578 DEBUG MASTGateway fs/mastgateway/src/cc/s3/curlclient.cc:51 Thread: 3528 CURLINFO_HEADER_OUT PUT /abizerobjectpooltests HTTP/1.1^M

ii)  Once abizerobjectpooltests bucket is created the objects are put/stored in the bucket .


[root@node113rhel67 logs]# grep abizerobjectpooltests object_log 
2017-09-28 17:37:33,2578 DEBUG MASTGateway fs/mastgateway/src/cc/s3/curlclient.cc:51 Thread: 3528 CURLINFO_HEADER_OUT PUT /abizerobjectpooltests HTTP/1.1
2017-09-28 17:37:33,3369 DEBUG MASTGateway fs/mastgateway/src/cc/s3/curlclient.cc:51 Thread: 3530 CURLINFO_HEADER_OUT PUT /abizerobjectpooltests HTTP/1.1
2017-09-28 17:37:33,6252 DEBUG MASTGateway fs/mastgateway/src/cc/s3/curlclient.cc:55 Thread: 3528 CURLINFO_HEADER_IN Location: /abizerobjectpooltests
2017-09-28 17:37:33,8568 DEBUG MASTGateway fs/mastgateway/src/cc/s3/curlclient.cc:51 Thread: 3528 CURLINFO_HEADER_OUT PUT /abizerobjectpooltests/0.9096de5.881.1 
2017-09-28 17:37:33,8909 DEBUG MASTGateway fs/mastgateway/src/cc/s3/curlclient.cc:55 Thread: 3530 CURLINFO_HEADER_IN Location: /abizerobjectpooltests
2017-09-28 17:37:34,1754 DEBUG MASTGateway fs/mastgateway/src/cc/s3/curlclient.cc:51 Thread: 3530 CURLINFO_HEADER_OUT PUT /abizerobjectpooltests/0.9096de5.884.1 


7) finally we will see messages as below which confirms offload job completed.


2017-09-28 17:37:19,6995 DEBUG MASTGateway fs/mastgateway/src/cc/common/offloader.cc:590 Thread: 3529 TierOffload: job done for cid: 2178 with status 0
2017-09-28 17:37:19,7039 DEBUG MASTGateway fs/mastgateway/src/cc/common/offloader.cc:590 Thread: 3526 TierOffload: job done for cid: 2179 with status 0
2017-09-28 17:37:19,7126 DEBUG MASTGateway fs/mastgateway/src/cc/common/offloader.cc:590 Thread: 3533 TierOffload: job done for cid: 2181 with status 0
2017-09-28 17:37:19,7135 DEBUG MASTGateway fs/mastgateway/src/cc/common/offloader.cc:590 Thread: 3534 TierOffload: job done for cid: 2182 with status 0
2017-09-28 17:37:44,9441 DEBUG MASTGateway fs/mastgateway/src/cc/common/offloader.cc:590 Thread: 3528 TierOffload: job done for cid: 2177 with status 0
2017-09-28 17:37:47,9266 DEBUG MASTGateway fs/mastgateway/src/cc/common/offloader.cc:590 Thread: 3530 TierOffload: job done for cid: 2180 with status 0

8) After the offload scratch/temp path is removed and CLDB is updated about Job being completed.


2017-09-28 17:37:47,9266 DEBUG MASTGateway fs/mastgateway/src/cc/common/offloader.cc:505 Thread: 3530 TierOffload: Removing scratchpath: /maprtier_aws_us_east1/151612901/TierOffload
2017-09-28 17:37:47,9266 INFO MASTGateway fs/mastgateway/src/cc/common/offloader.cc:514 Thread: 3530 TierOffload: job done for volId: 151612901 with status 0, updating CLDB..


Debugging :


1) Install s3cmd  : Command  line client for copying files to/from Amazon S3 (Simple Storage Service) and performing other related tasks, for instance creating and removing buckets, listing objects, etc.


 yum install s3cmd

2) Configure s3cmd to connect with S3  using my Access/Secret key to specific region.

[root@node113rhel67 ~]# s3cmd --configure

Enter new values or accept defaults in brackets with Enter.
Refer to user manual for detailed description of all options.

Access key and Secret key are your identifiers for Amazon S3. Leave them empty for using the env variables.
Access Key: XXXXX     --> Enter details
Secret Key: XXXXX     --> Enter details
Default Region [US]: us-east-1

Encryption password is used to protect your files from reading
by unauthorized persons while in transfer to S3
Encryption password: 
Path to GPG program [/usr/bin/gpg]: 

When using secure HTTPS protocol all communication with Amazon S3
servers is protected from 3rd party eavesdropping. This method is
slower than plain HTTP, and can only be proxied with Python 2.7 or newer
Use HTTPS protocol [Yes]: no

On some networks all internet access must go through a HTTP proxy.
Try setting it here if you can't connect to S3 directly
HTTP Proxy server name:  

New settings:
  Access Key: XXXXX
  Secret Key: XXXXX
  Default Region: us-east-1
  Encryption password: 
  Path to GPG program: /usr/bin/gpg
  Use HTTPS protocol: False
  HTTP Proxy server name: 
  HTTP Proxy server port: 0

Test access with supplied credentials? [Y/n] Y
Please wait, attempting to list all buckets...
Success. Your access key and secret key worked fine :-)

Now verifying that encryption works...
Not configured. Never mind.

Save settings? [y/N] y
Configuration saved to '/root/.s3cfg'
[root@node113rhel67 ~]#

Example of my configuration file .

[root@node113rhel67 ~]# cat /root/.s3cfg
[default]
access_key = XXXXX
access_token = 
add_encoding_exts = 
add_headers = 
bucket_location = us-east-1
ca_certs_file = 
cache_file = 
check_ssl_certificate = True
check_ssl_hostname = True
cloudfront_host = cloudfront.amazonaws.com
default_mime_type = binary/octet-stream
delay_updates = False
delete_after = False
delete_after_fetch = False
delete_removed = False
dry_run = False
enable_multipart = True
encoding = UTF-8
encrypt = False
expiry_date = 
expiry_days = 
expiry_prefix = 
follow_symlinks = False
force = False
get_continue = False
gpg_command = /usr/bin/gpg
gpg_decrypt = %(gpg_command)s -d --verbose --no-use-agent --batch --yes --passphrase-fd %(passphrase_fd)s -o %(output_file)s %(input_file)s
gpg_encrypt = %(gpg_command)s -c --verbose --no-use-agent --batch --yes --passphrase-fd %(passphrase_fd)s -o %(output_file)s %(input_file)s
gpg_passphrase = 
guess_mime_type = True
host_base = s3.amazonaws.com
host_bucket = %(bucket)s.s3.amazonaws.com
human_readable_sizes = False
invalidate_default_index_on_cf = False
invalidate_default_index_root_on_cf = True
invalidate_on_cf = False
kms_key = 
limitrate = 0
list_md5 = False
log_target_prefix = 
long_listing = False
max_delete = -1
mime_type = 
multipart_chunk_size_mb = 15
multipart_max_chunks = 10000
preserve_attrs = True
progress_meter = True
proxy_host = 
proxy_port = 0
put_continue = False
recursive = False
recv_chunk = 65536
reduced_redundancy = False
requester_pays = False
restore_days = 1
secret_key = XXXXX
send_chunk = 65536
server_side_encryption = False
signature_v2 = False
simpledb_host = sdb.amazonaws.com
skip_existing = False
socket_timeout = 300
stats = False
stop_on_error = False
storage_class = 
urlencoding_mode = normal
use_https = False
use_mime_magic = True
verbosity = WARNING
website_endpoint = http://%(bucket)s.s3-website-%(location)s.amazonaws.com/
website_error = 
website_index = index.html
[root@node113rhel67 ~]# 

3) Verify connection to the bucket and list the objects.

[root@node113rhel67 ~]# s3cmd ls s3://abizerobjectpooltests
2017-09-29 00:41     65837   s3://abizerobjectpooltests/0.9096de5.881.1
2017-10-05 05:08     65837   s3://abizerobjectpooltests/0.9096de5.881.15
2017-10-04 21:52     65837   s3://abizerobjectpooltests/0.9096de5.881.b
2017-10-04 21:52   1672236   s3://abizerobjectpooltests/0.9096de5.882.1
2017-09-29 00:41   1672236   s3://abizerobjectpooltests/0.9096de5.884.1
2017-10-05 05:08   1363399   s3://abizerobjectpooltests/0.9096de5.884.b
[root@node113rhel67 ~]# 


Verify if Data file is offloaded or no

File not offloaded :

[root@node113rhel67 ~]#  hadoop mfs -tieredstatus /objectpool/storagefile3
File has local data.

File is Offloaded :

[root@node113rhel67 ~]#  hadoop mfs -tieredstatus /objectpool/storagefile2
File does not have local data.

[root@node113rhel67 ~]#