[dart] r41457 committed - Update checked in version of gsutil to version 4.6...

567 views

Skip to first unread message

da...@googlecode.com

unread,

Nov 3, 2014, 7:42:32 AM11/3/14

to com...@dartlang.org

Revision: 41457
Author: ri...@google.com
Date: Mon Nov 3 12:38:28 2014 UTC
Log: Update checked in version of gsutil to version 4.6

R=whe...@google.com

Review URL: https://codereview.chromium.org//698893003
https://code.google.com/p/dart/source/detail?r=41457

Added:
/third_party/gsutil/gslib/addlhelp/apis.py
/third_party/gsutil/gslib/addlhelp/retries.py
/third_party/gsutil/gslib/boto_resumable_upload.py
/third_party/gsutil/gslib/boto_translation.py
/third_party/gsutil/gslib/cat_helper.py
/third_party/gsutil/gslib/cloud_api.py
/third_party/gsutil/gslib/cloud_api_delegator.py
/third_party/gsutil/gslib/cloud_api_helper.py
/third_party/gsutil/gslib/commands/hash.py
/third_party/gsutil/gslib/commands/rsync.py
/third_party/gsutil/gslib/commands/signurl.py
/third_party/gsutil/gslib/copy_helper.py
/third_party/gsutil/gslib/cs_api_map.py
/third_party/gsutil/gslib/daisy_chain_wrapper.py
/third_party/gsutil/gslib/gcs_json_api.py
/third_party/gsutil/gslib/gcs_json_media.py
/third_party/gsutil/gslib/hashing_helper.py
/third_party/gsutil/gslib/ls_helper.py
/third_party/gsutil/gslib/no_op_credentials.py
/third_party/gsutil/gslib/progress_callback.py
/third_party/gsutil/gslib/storage_url.py
/third_party/gsutil/gslib/tests/mock_cloud_api.py
/third_party/gsutil/gslib/tests/mock_logging_handler.py
/third_party/gsutil/gslib/tests/test_bucketconfig.py
/third_party/gsutil/gslib/tests/test_copy_helper_funcs.py
/third_party/gsutil/gslib/tests/test_creds_config.py
/third_party/gsutil/gslib/tests/test_data/test.p12
/third_party/gsutil/gslib/tests/test_data/test.txt
/third_party/gsutil/gslib/tests/test_hash.py
/third_party/gsutil/gslib/tests/test_hashing_helper.py
/third_party/gsutil/gslib/tests/test_mb.py
/third_party/gsutil/gslib/tests/test_parallel_cp.py
/third_party/gsutil/gslib/tests/test_rb.py
/third_party/gsutil/gslib/tests/test_rsync.py
/third_party/gsutil/gslib/tests/test_rsync_funcs.py
/third_party/gsutil/gslib/tests/test_signurl.py
/third_party/gsutil/gslib/third_party/protorpc
/third_party/gsutil/gslib/third_party/protorpc/__init__.py
/third_party/gsutil/gslib/third_party/protorpc/message_types.py
/third_party/gsutil/gslib/third_party/protorpc/messages.py
/third_party/gsutil/gslib/third_party/protorpc/protojson.py
/third_party/gsutil/gslib/third_party/protorpc/util.py
/third_party/gsutil/gslib/third_party/storage_apitools
/third_party/gsutil/gslib/third_party/storage_apitools/__init__.py
/third_party/gsutil/gslib/third_party/storage_apitools/base_api.py
/third_party/gsutil/gslib/third_party/storage_apitools/credentials_lib.py
/third_party/gsutil/gslib/third_party/storage_apitools/encoding.py
/third_party/gsutil/gslib/third_party/storage_apitools/exceptions.py
/third_party/gsutil/gslib/third_party/storage_apitools/extra_types.py
/third_party/gsutil/gslib/third_party/storage_apitools/http_wrapper.py
/third_party/gsutil/gslib/third_party/storage_apitools/storage_v1_client.py

/third_party/gsutil/gslib/third_party/storage_apitools/storage_v1_messages.py
/third_party/gsutil/gslib/third_party/storage_apitools/stream_slice.py
/third_party/gsutil/gslib/third_party/storage_apitools/transfer.py
/third_party/gsutil/gslib/third_party/storage_apitools/util.py
/third_party/gsutil/gslib/translation_helper.py
/third_party/gsutil/third_party/boto/boto/cloudsearch2
/third_party/gsutil/third_party/boto/boto/cloudsearch2/__init__.py
/third_party/gsutil/third_party/boto/boto/cloudsearch2/document.py
/third_party/gsutil/third_party/boto/boto/cloudsearch2/domain.py
/third_party/gsutil/third_party/boto/boto/cloudsearch2/exceptions.py
/third_party/gsutil/third_party/boto/boto/cloudsearch2/layer1.py
/third_party/gsutil/third_party/boto/boto/cloudsearch2/layer2.py
/third_party/gsutil/third_party/boto/boto/cloudsearch2/optionstatus.py
/third_party/gsutil/third_party/boto/boto/cloudsearch2/search.py
/third_party/gsutil/third_party/boto/boto/ec2/instancetype.py
/third_party/gsutil/third_party/boto/boto/endpoints.json
/third_party/gsutil/third_party/boto/boto/rds/logfile.py
/third_party/gsutil/third_party/boto/boto/rds2
/third_party/gsutil/third_party/boto/boto/rds2/__init__.py
/third_party/gsutil/third_party/boto/boto/rds2/exceptions.py
/third_party/gsutil/third_party/boto/boto/rds2/layer1.py
/third_party/gsutil/third_party/boto/boto/requestlog.py
/third_party/gsutil/third_party/boto/boto/route53/healthcheck.py
/third_party/gsutil/third_party/boto/boto/sqs/messageattributes.py
/third_party/gsutil/third_party/boto/scripts/rebuild_endpoints.py
/third_party/gsutil/third_party/boto/tests/integration/cloudsearch2

/third_party/gsutil/third_party/boto/tests/integration/cloudsearch2/__init__.py

/third_party/gsutil/third_party/boto/tests/integration/cloudsearch2/test_cert_verification.py

/third_party/gsutil/third_party/boto/tests/integration/cloudsearch2/test_layers.py

/third_party/gsutil/third_party/boto/tests/integration/cloudtrail/test_cert_verification.py

/third_party/gsutil/third_party/boto/tests/integration/datapipeline/__init__.py

/third_party/gsutil/third_party/boto/tests/integration/datapipeline/test_cert_verification.py

/third_party/gsutil/third_party/boto/tests/integration/kinesis/test_cert_verification.py
/third_party/gsutil/third_party/boto/tests/integration/rds2
/third_party/gsutil/third_party/boto/tests/integration/rds2/__init__.py

/third_party/gsutil/third_party/boto/tests/integration/rds2/test_cert_verification.py

/third_party/gsutil/third_party/boto/tests/integration/rds2/test_connection.py

/third_party/gsutil/third_party/boto/tests/integration/route53/test_alias_resourcerecordsets.py

/third_party/gsutil/third_party/boto/tests/integration/route53/test_health_check.py

/third_party/gsutil/third_party/boto/tests/unit/cloudfront/test_connection.py
/third_party/gsutil/third_party/boto/tests/unit/cloudsearch2
/third_party/gsutil/third_party/boto/tests/unit/cloudsearch2/__init__.py

/third_party/gsutil/third_party/boto/tests/unit/cloudsearch2/test_connection.py

/third_party/gsutil/third_party/boto/tests/unit/cloudsearch2/test_document.py

/third_party/gsutil/third_party/boto/tests/unit/cloudsearch2/test_exceptions.py
/third_party/gsutil/third_party/boto/tests/unit/cloudsearch2/test_search.py
/third_party/gsutil/third_party/boto/tests/unit/ec2/test_ec2object.py
/third_party/gsutil/third_party/boto/tests/unit/ec2/test_instancetype.py
/third_party/gsutil/third_party/boto/tests/unit/ecs
/third_party/gsutil/third_party/boto/tests/unit/ecs/__init__.py
/third_party/gsutil/third_party/boto/tests/unit/ecs/test_connection.py
/third_party/gsutil/third_party/boto/tests/unit/rds2
/third_party/gsutil/third_party/boto/tests/unit/rds2/__init__.py
/third_party/gsutil/third_party/boto/tests/unit/rds2/test_connection.py
/third_party/gsutil/third_party/boto/tests/unit/swf/test_layer2_base.py
/third_party/gsutil/third_party/boto/tests/unit/test_endpoints.json
/third_party/gsutil/third_party/boto/tests/unit/test_regioninfo.py
/third_party/gsutil/third_party/gcs-oauth2-boto-plugin
/third_party/gsutil/third_party/gcs-oauth2-boto-plugin/.gitignore
/third_party/gsutil/third_party/gcs-oauth2-boto-plugin/COPYING
/third_party/gsutil/third_party/gcs-oauth2-boto-plugin/README.md

/third_party/gsutil/third_party/gcs-oauth2-boto-plugin/gcs_oauth2_boto_plugin

/third_party/gsutil/third_party/gcs-oauth2-boto-plugin/gcs_oauth2_boto_plugin/__init__.py

/third_party/gsutil/third_party/gcs-oauth2-boto-plugin/gcs_oauth2_boto_plugin/oauth2_client.py

/third_party/gsutil/third_party/gcs-oauth2-boto-plugin/gcs_oauth2_boto_plugin/oauth2_helper.py

/third_party/gsutil/third_party/gcs-oauth2-boto-plugin/gcs_oauth2_boto_plugin/oauth2_plugin.py

/third_party/gsutil/third_party/gcs-oauth2-boto-plugin/gcs_oauth2_boto_plugin/test_oauth2_client.py
/third_party/gsutil/third_party/gcs-oauth2-boto-plugin/requirements.txt
/third_party/gsutil/third_party/gcs-oauth2-boto-plugin/setup.py

/third_party/gsutil/third_party/gcs-oauth2-boto-plugin/test-requirements.txt
Deleted:
/third_party/gsutil/LICENSE.third_party
/third_party/gsutil/gslib/tests/test_cp_funcs.py
/third_party/gsutil/gslib/tests/test_oauth2_client.py
/third_party/gsutil/gslib/tests/test_thread_pool.py
/third_party/gsutil/gslib/third_party/oauth2_plugin
/third_party/gsutil/gslib/thread_pool.py
/third_party/gsutil/scripts
/third_party/gsutil/third_party/boto/boto/core
/third_party/gsutil/third_party/boto/boto/ec2/vmtype.py
Modified:
/third_party/gsutil/CHANGES.md
/third_party/gsutil/CHECKSUM
/third_party/gsutil/MANIFEST.in
/third_party/gsutil/README.md
/third_party/gsutil/VERSION
/third_party/gsutil/gslib/__init__.py
/third_party/gsutil/gslib/__main__.py
/third_party/gsutil/gslib/aclhelpers.py
/third_party/gsutil/gslib/addlhelp/__init__.py
/third_party/gsutil/gslib/addlhelp/acls.py
/third_party/gsutil/gslib/addlhelp/anon.py
/third_party/gsutil/gslib/addlhelp/command_opts.py
/third_party/gsutil/gslib/addlhelp/crc32c.py
/third_party/gsutil/gslib/addlhelp/creds.py
/third_party/gsutil/gslib/addlhelp/dev.py
/third_party/gsutil/gslib/addlhelp/metadata.py
/third_party/gsutil/gslib/addlhelp/naming.py
/third_party/gsutil/gslib/addlhelp/prod.py
/third_party/gsutil/gslib/addlhelp/projects.py
/third_party/gsutil/gslib/addlhelp/subdirs.py
/third_party/gsutil/gslib/addlhelp/support.py
/third_party/gsutil/gslib/addlhelp/versions.py
/third_party/gsutil/gslib/addlhelp/wildcards.py
/third_party/gsutil/gslib/bucket_listing_ref.py
/third_party/gsutil/gslib/command.py
/third_party/gsutil/gslib/command_runner.py
/third_party/gsutil/gslib/commands/__init__.py
/third_party/gsutil/gslib/commands/acl.py
/third_party/gsutil/gslib/commands/cat.py
/third_party/gsutil/gslib/commands/compose.py
/third_party/gsutil/gslib/commands/config.py
/third_party/gsutil/gslib/commands/cors.py
/third_party/gsutil/gslib/commands/cp.py
/third_party/gsutil/gslib/commands/defacl.py
/third_party/gsutil/gslib/commands/du.py
/third_party/gsutil/gslib/commands/help.py
/third_party/gsutil/gslib/commands/lifecycle.py
/third_party/gsutil/gslib/commands/logging.py
/third_party/gsutil/gslib/commands/ls.py
/third_party/gsutil/gslib/commands/mb.py
/third_party/gsutil/gslib/commands/mv.py
/third_party/gsutil/gslib/commands/notification.py
/third_party/gsutil/gslib/commands/perfdiag.py
/third_party/gsutil/gslib/commands/rb.py
/third_party/gsutil/gslib/commands/rm.py
/third_party/gsutil/gslib/commands/setmeta.py
/third_party/gsutil/gslib/commands/stat.py
/third_party/gsutil/gslib/commands/test.py
/third_party/gsutil/gslib/commands/update.py
/third_party/gsutil/gslib/commands/version.py
/third_party/gsutil/gslib/commands/versioning.py
/third_party/gsutil/gslib/commands/web.py
/third_party/gsutil/gslib/cred_types.py
/third_party/gsutil/gslib/exception.py
/third_party/gsutil/gslib/file_part.py
/third_party/gsutil/gslib/help_provider.py
/third_party/gsutil/gslib/name_expansion.py
/third_party/gsutil/gslib/no_op_auth_plugin.py
/third_party/gsutil/gslib/parallelism_framework_util.py
/third_party/gsutil/gslib/plurality_checkable_iterator.py
/third_party/gsutil/gslib/project_id.py
/third_party/gsutil/gslib/storage_uri_builder.py
/third_party/gsutil/gslib/tests/__init__.py
/third_party/gsutil/gslib/tests/test_Doption.py
/third_party/gsutil/gslib/tests/test_acl.py
/third_party/gsutil/gslib/tests/test_cat.py
/third_party/gsutil/gslib/tests/test_command_runner.py
/third_party/gsutil/gslib/tests/test_compose.py
/third_party/gsutil/gslib/tests/test_cors.py
/third_party/gsutil/gslib/tests/test_cp.py
/third_party/gsutil/gslib/tests/test_defacl.py
/third_party/gsutil/gslib/tests/test_du.py
/third_party/gsutil/gslib/tests/test_file_part.py
/third_party/gsutil/gslib/tests/test_gsutil.py
/third_party/gsutil/gslib/tests/test_help.py
/third_party/gsutil/gslib/tests/test_lifecycle.py
/third_party/gsutil/gslib/tests/test_logging.py
/third_party/gsutil/gslib/tests/test_ls.py
/third_party/gsutil/gslib/tests/test_mv.py
/third_party/gsutil/gslib/tests/test_naming.py
/third_party/gsutil/gslib/tests/test_notification.py
/third_party/gsutil/gslib/tests/test_parallelism_framework.py
/third_party/gsutil/gslib/tests/test_perfdiag.py
/third_party/gsutil/gslib/tests/test_plurality_checkable_iterator.py
/third_party/gsutil/gslib/tests/test_rm.py
/third_party/gsutil/gslib/tests/test_setmeta.py
/third_party/gsutil/gslib/tests/test_stat.py
/third_party/gsutil/gslib/tests/test_update.py
/third_party/gsutil/gslib/tests/test_util.py
/third_party/gsutil/gslib/tests/test_versioning.py
/third_party/gsutil/gslib/tests/test_web.py
/third_party/gsutil/gslib/tests/test_wildcard_iterator.py
/third_party/gsutil/gslib/tests/testcase/__init__.py
/third_party/gsutil/gslib/tests/testcase/base.py
/third_party/gsutil/gslib/tests/testcase/integration_testcase.py
/third_party/gsutil/gslib/tests/testcase/unit_testcase.py
/third_party/gsutil/gslib/tests/util.py
/third_party/gsutil/gslib/util.py
/third_party/gsutil/gslib/wildcard_iterator.py
/third_party/gsutil/gsutil
/third_party/gsutil/gsutil.py
/third_party/gsutil/setup.py
/third_party/gsutil/third_party/boto/.travis.yml
/third_party/gsutil/third_party/boto/CONTRIBUTING
/third_party/gsutil/third_party/boto/MANIFEST.in
/third_party/gsutil/third_party/boto/README.rst
/third_party/gsutil/third_party/boto/bin/cq
/third_party/gsutil/third_party/boto/bin/elbadmin
/third_party/gsutil/third_party/boto/bin/glacier
/third_party/gsutil/third_party/boto/bin/lss3
/third_party/gsutil/third_party/boto/bin/mturk
/third_party/gsutil/third_party/boto/bin/route53
/third_party/gsutil/third_party/boto/bin/s3put
/third_party/gsutil/third_party/boto/bin/sdbadmin
/third_party/gsutil/third_party/boto/boto/__init__.py
/third_party/gsutil/third_party/boto/boto/auth.py
/third_party/gsutil/third_party/boto/boto/beanstalk/__init__.py
/third_party/gsutil/third_party/boto/boto/beanstalk/layer1.py
/third_party/gsutil/third_party/boto/boto/cloudformation/__init__.py
/third_party/gsutil/third_party/boto/boto/cloudformation/connection.py
/third_party/gsutil/third_party/boto/boto/cloudformation/stack.py
/third_party/gsutil/third_party/boto/boto/cloudformation/template.py
/third_party/gsutil/third_party/boto/boto/cloudfront/__init__.py
/third_party/gsutil/third_party/boto/boto/cloudfront/distribution.py
/third_party/gsutil/third_party/boto/boto/cloudsearch/__init__.py
/third_party/gsutil/third_party/boto/boto/cloudsearch/layer1.py
/third_party/gsutil/third_party/boto/boto/cloudtrail/__init__.py
/third_party/gsutil/third_party/boto/boto/compat.py
/third_party/gsutil/third_party/boto/boto/connection.py
/third_party/gsutil/third_party/boto/boto/datapipeline/__init__.py
/third_party/gsutil/third_party/boto/boto/datapipeline/layer1.py
/third_party/gsutil/third_party/boto/boto/directconnect/__init__.py
/third_party/gsutil/third_party/boto/boto/dynamodb/__init__.py
/third_party/gsutil/third_party/boto/boto/dynamodb/layer1.py
/third_party/gsutil/third_party/boto/boto/dynamodb/layer2.py
/third_party/gsutil/third_party/boto/boto/dynamodb2/__init__.py
/third_party/gsutil/third_party/boto/boto/dynamodb2/exceptions.py
/third_party/gsutil/third_party/boto/boto/dynamodb2/fields.py
/third_party/gsutil/third_party/boto/boto/dynamodb2/layer1.py
/third_party/gsutil/third_party/boto/boto/dynamodb2/results.py
/third_party/gsutil/third_party/boto/boto/dynamodb2/table.py
/third_party/gsutil/third_party/boto/boto/ec2/__init__.py
/third_party/gsutil/third_party/boto/boto/ec2/address.py
/third_party/gsutil/third_party/boto/boto/ec2/autoscale/__init__.py
/third_party/gsutil/third_party/boto/boto/ec2/autoscale/group.py
/third_party/gsutil/third_party/boto/boto/ec2/autoscale/launchconfig.py
/third_party/gsutil/third_party/boto/boto/ec2/autoscale/policy.py
/third_party/gsutil/third_party/boto/boto/ec2/blockdevicemapping.py
/third_party/gsutil/third_party/boto/boto/ec2/cloudwatch/__init__.py
/third_party/gsutil/third_party/boto/boto/ec2/connection.py
/third_party/gsutil/third_party/boto/boto/ec2/ec2object.py
/third_party/gsutil/third_party/boto/boto/ec2/elb/__init__.py
/third_party/gsutil/third_party/boto/boto/ec2/elb/attributes.py
/third_party/gsutil/third_party/boto/boto/ec2/elb/loadbalancer.py
/third_party/gsutil/third_party/boto/boto/ec2/image.py
/third_party/gsutil/third_party/boto/boto/ec2/networkinterface.py
/third_party/gsutil/third_party/boto/boto/ec2/regioninfo.py
/third_party/gsutil/third_party/boto/boto/ec2/snapshot.py
/third_party/gsutil/third_party/boto/boto/ec2/volume.py
/third_party/gsutil/third_party/boto/boto/ecs/__init__.py
/third_party/gsutil/third_party/boto/boto/ecs/item.py
/third_party/gsutil/third_party/boto/boto/elasticache/__init__.py
/third_party/gsutil/third_party/boto/boto/elastictranscoder/__init__.py
/third_party/gsutil/third_party/boto/boto/emr/__init__.py
/third_party/gsutil/third_party/boto/boto/emr/connection.py
/third_party/gsutil/third_party/boto/boto/emr/emrobject.py
/third_party/gsutil/third_party/boto/boto/exception.py
/third_party/gsutil/third_party/boto/boto/fps/response.py
/third_party/gsutil/third_party/boto/boto/glacier/__init__.py
/third_party/gsutil/third_party/boto/boto/glacier/layer1.py
/third_party/gsutil/third_party/boto/boto/glacier/vault.py
/third_party/gsutil/third_party/boto/boto/gs/bucketlistresultset.py
/third_party/gsutil/third_party/boto/boto/gs/connection.py
/third_party/gsutil/third_party/boto/boto/gs/key.py
/third_party/gsutil/third_party/boto/boto/gs/user.py
/third_party/gsutil/third_party/boto/boto/https_connection.py
/third_party/gsutil/third_party/boto/boto/iam/__init__.py
/third_party/gsutil/third_party/boto/boto/iam/connection.py
/third_party/gsutil/third_party/boto/boto/kinesis/__init__.py
/third_party/gsutil/third_party/boto/boto/kinesis/layer1.py
/third_party/gsutil/third_party/boto/boto/mturk/connection.py
/third_party/gsutil/third_party/boto/boto/mws/connection.py
/third_party/gsutil/third_party/boto/boto/mws/exception.py
/third_party/gsutil/third_party/boto/boto/mws/response.py
/third_party/gsutil/third_party/boto/boto/opsworks/__init__.py
/third_party/gsutil/third_party/boto/boto/opsworks/layer1.py
/third_party/gsutil/third_party/boto/boto/provider.py
/third_party/gsutil/third_party/boto/boto/pyami/config.py
/third_party/gsutil/third_party/boto/boto/rds/__init__.py
/third_party/gsutil/third_party/boto/boto/rds/regioninfo.py
/third_party/gsutil/third_party/boto/boto/redshift/__init__.py
/third_party/gsutil/third_party/boto/boto/regioninfo.py
/third_party/gsutil/third_party/boto/boto/roboto/awsqueryservice.py
/third_party/gsutil/third_party/boto/boto/roboto/param.py
/third_party/gsutil/third_party/boto/boto/route53/__init__.py
/third_party/gsutil/third_party/boto/boto/route53/connection.py
/third_party/gsutil/third_party/boto/boto/route53/record.py
/third_party/gsutil/third_party/boto/boto/route53/zone.py
/third_party/gsutil/third_party/boto/boto/s3/__init__.py
/third_party/gsutil/third_party/boto/boto/s3/acl.py
/third_party/gsutil/third_party/boto/boto/s3/bucket.py
/third_party/gsutil/third_party/boto/boto/s3/connection.py
/third_party/gsutil/third_party/boto/boto/s3/key.py
/third_party/gsutil/third_party/boto/boto/s3/lifecycle.py
/third_party/gsutil/third_party/boto/boto/sdb/__init__.py
/third_party/gsutil/third_party/boto/boto/sdb/connection.py
/third_party/gsutil/third_party/boto/boto/sdb/regioninfo.py
/third_party/gsutil/third_party/boto/boto/ses/__init__.py
/third_party/gsutil/third_party/boto/boto/ses/connection.py
/third_party/gsutil/third_party/boto/boto/sns/__init__.py
/third_party/gsutil/third_party/boto/boto/sns/connection.py
/third_party/gsutil/third_party/boto/boto/sqs/__init__.py
/third_party/gsutil/third_party/boto/boto/sqs/connection.py
/third_party/gsutil/third_party/boto/boto/sqs/message.py
/third_party/gsutil/third_party/boto/boto/sqs/queue.py
/third_party/gsutil/third_party/boto/boto/sqs/regioninfo.py
/third_party/gsutil/third_party/boto/boto/sts/__init__.py
/third_party/gsutil/third_party/boto/boto/sts/connection.py
/third_party/gsutil/third_party/boto/boto/support/__init__.py
/third_party/gsutil/third_party/boto/boto/swf/__init__.py
/third_party/gsutil/third_party/boto/boto/swf/layer1.py
/third_party/gsutil/third_party/boto/boto/swf/layer2.py
/third_party/gsutil/third_party/boto/boto/utils.py
/third_party/gsutil/third_party/boto/boto/vpc/__init__.py
/third_party/gsutil/third_party/boto/setup.cfg
/third_party/gsutil/third_party/boto/setup.py

/third_party/gsutil/third_party/boto/tests/integration/cloudformation/test_connection.py

/third_party/gsutil/third_party/boto/tests/integration/dynamodb2/test_highlevel.py

/third_party/gsutil/third_party/boto/tests/integration/ec2/elb/test_connection.py

/third_party/gsutil/third_party/boto/tests/integration/ec2/vpc/test_connection.py
/third_party/gsutil/third_party/boto/tests/integration/gs/test_basic.py

/third_party/gsutil/third_party/boto/tests/integration/kinesis/test_kinesis.py
/third_party/gsutil/third_party/boto/tests/integration/mws/test.py

/third_party/gsutil/third_party/boto/tests/integration/opsworks/test_layer1.py
/third_party/gsutil/third_party/boto/tests/integration/route53/__init__.py

/third_party/gsutil/third_party/boto/tests/integration/route53/test_cert_verification.py

/third_party/gsutil/third_party/boto/tests/integration/route53/test_resourcerecordsets.py
/third_party/gsutil/third_party/boto/tests/integration/route53/test_zone.py
/third_party/gsutil/third_party/boto/tests/integration/s3/test_bucket.py

/third_party/gsutil/third_party/boto/tests/integration/s3/test_connection.py
/third_party/gsutil/third_party/boto/tests/integration/s3/test_key.py
/third_party/gsutil/third_party/boto/tests/integration/s3/test_multipart.py

/third_party/gsutil/third_party/boto/tests/integration/sts/test_session_token.py
/third_party/gsutil/third_party/boto/tests/mturk/reviewable_hits.doctest
/third_party/gsutil/third_party/boto/tests/unit/__init__.py
/third_party/gsutil/third_party/boto/tests/unit/auth/test_sigv4.py
/third_party/gsutil/third_party/boto/tests/unit/beanstalk/test_layer1.py

/third_party/gsutil/third_party/boto/tests/unit/cloudformation/test_connection.py

/third_party/gsutil/third_party/boto/tests/unit/cloudformation/test_stack.py
/third_party/gsutil/third_party/boto/tests/unit/dynamodb2/test_table.py
/third_party/gsutil/third_party/boto/tests/unit/ec2/autoscale/test_group.py
/third_party/gsutil/third_party/boto/tests/unit/ec2/test_address.py

/third_party/gsutil/third_party/boto/tests/unit/ec2/test_blockdevicemapping.py
/third_party/gsutil/third_party/boto/tests/unit/ec2/test_connection.py

/third_party/gsutil/third_party/boto/tests/unit/ec2/test_networkinterface.py
/third_party/gsutil/third_party/boto/tests/unit/ec2/test_snapshot.py
/third_party/gsutil/third_party/boto/tests/unit/ec2/test_volume.py
/third_party/gsutil/third_party/boto/tests/unit/emr/test_connection.py
/third_party/gsutil/third_party/boto/tests/unit/glacier/test_concurrent.py
/third_party/gsutil/third_party/boto/tests/unit/glacier/test_layer2.py
/third_party/gsutil/third_party/boto/tests/unit/iam/test_connection.py
/third_party/gsutil/third_party/boto/tests/unit/mws/test_connection.py
/third_party/gsutil/third_party/boto/tests/unit/mws/test_response.py
/third_party/gsutil/third_party/boto/tests/unit/provider/test_provider.py
/third_party/gsutil/third_party/boto/tests/unit/rds/test_connection.py
/third_party/gsutil/third_party/boto/tests/unit/route53/test_connection.py
/third_party/gsutil/third_party/boto/tests/unit/s3/test_bucket.py
/third_party/gsutil/third_party/boto/tests/unit/s3/test_connection.py
/third_party/gsutil/third_party/boto/tests/unit/ses/test_identity.py
/third_party/gsutil/third_party/boto/tests/unit/sns/test_connection.py
/third_party/gsutil/third_party/boto/tests/unit/sqs/test_connection.py
/third_party/gsutil/third_party/boto/tests/unit/sts/test_connection.py
/third_party/gsutil/third_party/boto/tests/unit/test_connection.py
/third_party/gsutil/third_party/boto/tests/unit/utils/test_utils.py

=======================================
--- /dev/null
+++ /third_party/gsutil/gslib/addlhelp/apis.py Mon Nov 3 12:38:28 2014 UTC
@@ -0,0 +1,62 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Additional help about gsutil's interaction with Cloud Storage APIs."""
+
+from gslib.help_provider import HelpProvider
+
+_DETAILED_HELP_TEXT = ("""
+OVERVIEW
+ Google Cloud Storage offers two APIs: an XML and a JSON API. Gsutil can
+ interact with both APIs. By default, gsutil versions starting with 4.0
+ interact with the JSON API. If it is not possible to perform a command
using
+ one of the APIs (for example, the notification command is not supported
in
+ the XML API), gsutil will silently fall back to using the other API.
Also,
+ gsutil will automatically fall back to using the XML API when interacting
+ with cloud storage providers that only support that API.
+
+CONFIGURING WHICH API IS USED
+ To use a certain API for interacting with Google Cloud Storage, you can
set
+ the 'prefer_api' variable in the "GSUtil" section of .boto config file to
+ 'xml' or 'json' like so:
+
+ prefer_api = json
+
+ This will cause gsutil to use that API where possible (falling back to
the
+ other API in cases as noted above). This applies to the gsutil test
command
+ as well; it will run integration tests against the preferred API.
+
+PERFORMANCE DIFFERENCES BETWEEN APIS
+ The XML API uses the boto framework. This framework re-reads downloaded
files
+ to compute an MD5 hash if one is not present. For objects that do not
+ include MD5 hashes in their metadata (for example Google Cloud Storage
+ composite objects), this doubles the bandwidth consumed and elapsed time
+ needed by the download. Therefore, if you are working with composite
objects,
+ it is recommended that you use the default value for prefer_api.
+""")
+
+
+class CommandOptions(HelpProvider):
+ """Additional help about gsutil's interaction with Cloud Storage APIs."""
+
+ # Help specification. See help_provider.py for documentation.
+ help_spec = HelpProvider.HelpSpec(
+ help_name='apis',
+ help_name_aliases=['XML', 'JSON', 'api', 'force_api', 'prefer_api'],
+ help_type='additional_help',
+ help_one_line_summary='Cloud Storage APIs',
+ help_text=_DETAILED_HELP_TEXT,
+ subcommand_help_text={},
+ )
+
=======================================
--- /dev/null
+++ /third_party/gsutil/gslib/addlhelp/retries.py Mon Nov 3 12:38:28 2014
UTC
@@ -0,0 +1,62 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Additional help text about retry handling."""
+
+from gslib.help_provider import HelpProvider
+
+_DETAILED_HELP_TEXT = ("""
+RETRY STRATEGY
+ There are a number of reasons that gsutil operations can fail; some are
not
+ retryable, and require that the user take some action, for example:
+ - Invalid credentials
+ - Network unreachable because of a proxy configuration problem
+ - Access denied, because the bucket or object you are trying to use
has an
+ ACL that doesn't permit the action you're trying to perform.
+
+ In other cases errors are retryable - basically, the HTTP 5xx error
codes. For
+ these cases, gsutil will retry using a truncated binary exponential
backoff
+ strategy:
+
+ - Wait a random period between [0..1] seconds and retry;
+ - If that fails, wait a random period between [0..2] seconds and retry;
+ - If that fails, wait a random period between [0..4] seconds and retry;
+ - And so on, up to a configurable maximum number of retries (default =
6).
+
+ Thus, by default, gsutil will retry 6 times over 1+2+4+8+16+32=63
seconds.
+ You can adjust the number of retries and maximum delay of any individual
retry
+ by editing the num_retries and max_retry_delay configuration variables
in the
+ "[Boto]" section of the .boto config file. Most users shouldn't need to
change
+ these values.
+
+ For data transfers (the gsutil cp and rsync commands), gsutil provides
+ additional retry functionality, in the form of resumable transfers.
+ Essentially, a transfer that was interrupted because of a transient error
+ can be restarted without starting over from scratch. For more details
+ about this, see the "RESUMABLE TRANSFERS" section of "gsutil help cp".
+""")
+
+
+class CommandOptions(HelpProvider):
+ """Additional help text about retry handling."""
+
+ # Help specification. See help_provider.py for documentation.
+ help_spec = HelpProvider.HelpSpec(
+ help_name='retries',
+ help_name_aliases=['retry', 'backoff'],
+ help_type='additional_help',
+ help_one_line_summary='Retry Handling Strategy',
+ help_text=_DETAILED_HELP_TEXT,
+ subcommand_help_text={},
+ )
=======================================
--- /dev/null
+++ /third_party/gsutil/gslib/boto_resumable_upload.py Mon Nov 3 12:38:28
2014 UTC
@@ -0,0 +1,580 @@
+# -*- coding: utf-8 -*-
+# Copyright 2010 Google Inc. All Rights Reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish, dis-
+# tribute, sublicense, and/or sell copies of the Software, and to permit
+# persons to whom the Software is furnished to do so, subject to the fol-
+# lowing conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-
+# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
+# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+"""Boto translation layer for resumable uploads.
+
+See http://code.google.com/apis/storage/docs/developer-guide.html#resumable
+for details.
+
+Resumable uploads will retry interrupted uploads, resuming at the byte
+count completed by the last upload attempt. If too many retries happen with
+no progress (per configurable num_retries param), the upload will be
+aborted in the current process.
+
+Unlike the boto implementation of resumable upload handler, this class does
+not directly interact with tracker files.
+
+Originally Google wrote and contributed this code to the boto project,
+then copied that code back into gsutil on the release of gsutil 4.0 which
+supports both boto and non-boto codepaths for resumable uploads. Any bug
+fixes made to this file should also be integrated to
resumable_upload_handler.py
+in boto, where applicable.
+
+TODO: gsutil-beta: Add a similar comment to the boto code.
+"""
+
+from __future__ import absolute_import
+
+import errno
+import httplib
+import random
+import re
+import socket
+import time
+import urlparse
+from boto import UserAgent
+from boto.connection import AWSAuthConnection
+from boto.exception import ResumableTransferDisposition
+from boto.exception import ResumableUploadException
+from gslib.exception import InvalidUrlError
+from gslib.util import GetMaxRetryDelay
+from gslib.util import GetNumRetries
+from gslib.util import XML_PROGRESS_CALLBACKS
+
+
+class BotoResumableUpload(object):
+ """Upload helper class for resumable uploads via boto."""
+
+ BUFFER_SIZE = 8192
+ RETRYABLE_EXCEPTIONS = (httplib.HTTPException, IOError, socket.error,
+ socket.gaierror)
+
+ # (start, end) response indicating service has nothing (upload protocol
uses
+ # inclusive numbering).
+ SERVICE_HAS_NOTHING = (0, -1)
+
+ def __init__(self, tracker_callback, logger,
+ resume_url=None, num_retries=None):
+ """Constructor. Instantiate once for each uploaded file.
+
+ Args:
+ tracker_callback: Callback function that takes a string argument.
Used
+ by caller to track this upload across upload
+ interruption.
+ logger: logging.logger instance to use for debug messages.
+ resume_url: If present, attempt to resume the upload at this URL.
+ num_retries: Number of times to retry the upload making no progress.
+ This count resets every time we make progress, so the
upload
+ can span many more than this number of retries.
+ """
+ if resume_url:
+ self._SetUploadUrl(resume_url)
+ else:
+ self.upload_url = None
+ self.num_retries = num_retries
+ self.service_has_bytes = 0 # Byte count at last service check.
+ # Save upload_start_point in instance state so caller can find how
+ # much was transferred by this ResumableUploadHandler (across retries).
+ self.upload_start_point = None
+ self.tracker_callback = tracker_callback
+ self.logger = logger
+
+ def _SetUploadUrl(self, url):
+ """Saves URL and resets upload state.
+
+ Called when we start a new resumable upload or get a new tracker
+ URL for the upload.
+
+ Args:
+ url: URL string for the upload.
+
+ Raises InvalidUrlError if URL is syntactically invalid.
+ """
+ parse_result = urlparse.urlparse(url)
+ if (parse_result.scheme.lower() not in ['http', 'https'] or
+ not parse_result.netloc):
+ raise InvalidUrlError('Invalid upload URL (%s)' % url)
+ self.upload_url = url
+ self.upload_url_host = parse_result.netloc
+ self.upload_url_path = '%s?%s' % (
+ parse_result.path, parse_result.query)
+ self.service_has_bytes = 0
+
+ def _BuildContentRangeHeader(self, range_spec='*', length_spec='*'):
+ return 'bytes %s/%s' % (range_spec, length_spec)
+
+ def _QueryServiceState(self, conn, file_length):
+ """Queries service to find out state of given upload.
+
+ Note that this method really just makes special case use of the
+ fact that the upload service always returns the current start/end
+ state whenever a PUT doesn't complete.
+
+ Args:
+ conn: HTTPConnection to use for the query.
+ file_length: Total length of the file.
+
+ Returns:
+ HTTP response from sending request.
+
+ Raises:
+ ResumableUploadException if problem querying service.
+ """
+ # Send an empty PUT so that service replies with this resumable
+ # transfer's state.
+ put_headers = {}
+ put_headers['Content-Range'] = (
+ self._BuildContentRangeHeader('*', file_length))
+ put_headers['Content-Length'] = '0'
+ return AWSAuthConnection.make_request(
+ conn, 'PUT', path=self.upload_url_path,
auth_path=self.upload_url_path,
+ headers=put_headers, host=self.upload_url_host)
+
+ def _QueryServicePos(self, conn, file_length):
+ """Queries service to find out what bytes it currently has.
+
+ Args:
+ conn: HTTPConnection to use for the query.
+ file_length: Total length of the file.
+
+ Returns:
+ (service_start, service_end), where the values are inclusive.
+ For example, (0, 2) would mean that the service has bytes 0, 1,
*and* 2.
+
+ Raises:
+ ResumableUploadException if problem querying service.
+ """
+ resp = self._QueryServiceState(conn, file_length)
+ if resp.status == 200:
+ # To handle the boundary condition where the service has the complete
+ # file, we return (service_start, file_length-1). That way the
+ # calling code can always simply read up through service_end. (If we
+ # didn't handle this boundary condition here, the caller would have
+ # to check whether service_end == file_length and read one fewer byte
+ # in that case.)
+ return (0, file_length - 1) # Completed upload.
+ if resp.status != 308:
+ # This means the service didn't have any state for the given
+ # upload ID, which can happen (for example) if the caller saved
+ # the upload URL to a file and then tried to restart the transfer
+ # after that upload ID has gone stale. In that case we need to
+ # start a new transfer (and the caller will then save the new
+ # upload URL to the tracker file).
+ raise ResumableUploadException(
+ 'Got non-308 response (%s) from service state query' %
+ resp.status, ResumableTransferDisposition.START_OVER)
+ got_valid_response = False
+ range_spec = resp.getheader('range')
+ if range_spec:
+ # Parse 'bytes=<from>-<to>' range_spec.
+ m = re.search(r'bytes=(\d+)-(\d+)', range_spec)
+ if m:
+ service_start = long(m.group(1))
+ service_end = long(m.group(2))
+ got_valid_response = True
+ else:
+ # No Range header, which means the service does not yet have
+ # any bytes. Note that the Range header uses inclusive 'from'
+ # and 'to' values. Since Range 0-0 would mean that the service
+ # has byte 0, omitting the Range header is used to indicate that
+ # the service doesn't have any bytes.
+ return self.SERVICE_HAS_NOTHING
+ if not got_valid_response:
+ raise ResumableUploadException(
+ 'Couldn\'t parse upload service state query response (%s)' %
+ str(resp.getheaders()), ResumableTransferDisposition.START_OVER)
+ if conn.debug >= 1:
+ self.logger.debug('Service has: Range: %d - %d.', service_start,
+ service_end)
+ return (service_start, service_end)
+
+ def _StartNewResumableUpload(self, key, headers=None):
+ """Starts a new resumable upload.
+
+ Args:
+ key: Boto Key representing the object to upload.
+ headers: Headers to use in the upload requests.
+
+ Raises:
+ ResumableUploadException if any errors occur.
+ """
+ conn = key.bucket.connection
+ if conn.debug >= 1:
+ self.logger.debug('Starting new resumable upload.')
+ self.service_has_bytes = 0
+
+ # Start a new resumable upload by sending a POST request with an
+ # empty body and the "X-Goog-Resumable: start" header. Include any
+ # caller-provided headers (e.g., Content-Type) EXCEPT Content-Length
+ # (and raise an exception if they tried to pass one, since it's
+ # a semantic error to specify it at this point, and if we were to
+ # include one now it would cause the service to expect that many
+ # bytes; the POST doesn't include the actual file bytes We set
+ # the Content-Length in the subsequent PUT, based on the uploaded
+ # file size.
+ post_headers = {}
+ for k in headers:
+ if k.lower() == 'content-length':
+ raise ResumableUploadException(
+ 'Attempt to specify Content-Length header (disallowed)',
+ ResumableTransferDisposition.ABORT)
+ post_headers[k] = headers[k]
+ post_headers[conn.provider.resumable_upload_header] = 'start'
+
+ resp = conn.make_request(
+ 'POST', key.bucket.name, key.name, post_headers)
+ # Get upload URL from response 'Location' header.
+ body = resp.read()
+
+ # Check for various status conditions.
+ if resp.status in [500, 503]:
+ # Retry status 500 and 503 errors after a delay.
+ raise ResumableUploadException(
+ 'Got status %d from attempt to start resumable upload. '
+ 'Will wait/retry' % resp.status,
+ ResumableTransferDisposition.WAIT_BEFORE_RETRY)
+ elif resp.status != 200 and resp.status != 201:
+ raise ResumableUploadException(
+ 'Got status %d from attempt to start resumable upload. '
+ 'Aborting' % resp.status,
+ ResumableTransferDisposition.ABORT)
+
+ # Else we got 200 or 201 response code, indicating the resumable
+ # upload was created.
+ upload_url = resp.getheader('Location')
+ if not upload_url:
+ raise ResumableUploadException(
+ 'No resumable upload URL found in resumable initiation '
+ 'POST response (%s)' % body,
+ ResumableTransferDisposition.WAIT_BEFORE_RETRY)
+ self._SetUploadUrl(upload_url)
+ self.tracker_callback(upload_url)
+
+ def _UploadFileBytes(self, conn, http_conn, fp, file_length,
+ total_bytes_uploaded, cb, num_cb, headers):
+ """Attempts to upload file bytes.
+
+ Makes a single attempt using an existing resumable upload connection.
+
+ Args:
+ conn: HTTPConnection from the boto Key.
+ http_conn: Separate HTTPConnection for the transfer.
+ fp: File pointer containing bytes to upload.
+ file_length: Total length of the file.
+ total_bytes_uploaded: The total number of bytes uploaded.
+ cb: Progress callback function that takes (progress, total_size).
+ num_cb: Granularity of the callback (maximum number of times the
+ callback will be called during the file transfer). If
negative,
+ perform callback with each buffer read.
+ headers: Headers to be used in the upload requests.
+
+ Returns:
+ (etag, generation, metageneration) from service upon success.
+
+ Raises:
+ ResumableUploadException if any problems occur.
+ """
+ buf = fp.read(self.BUFFER_SIZE)
+ if cb:
+ # The cb_count represents the number of full buffers to send between
+ # cb executions.
+ if num_cb > 2:
+ cb_count = file_length / self.BUFFER_SIZE / (num_cb-2)
+ elif num_cb < 0:
+ cb_count = -1
+ else:
+ cb_count = 0
+ i = 0
+ cb(total_bytes_uploaded, file_length)
+
+ # Build resumable upload headers for the transfer. Don't send a
+ # Content-Range header if the file is 0 bytes long, because the
+ # resumable upload protocol uses an *inclusive* end-range (so, sending
+ # 'bytes 0-0/1' would actually mean you're sending a 1-byte file).
+ put_headers = headers.copy() if headers else {}
+ if file_length:
+ if total_bytes_uploaded == file_length:
+ range_header = self._BuildContentRangeHeader(
+ '*', file_length)
+ else:
+ range_header = self._BuildContentRangeHeader(
+ '%d-%d' % (total_bytes_uploaded, file_length - 1),
+ file_length)
+ put_headers['Content-Range'] = range_header
+ # Set Content-Length to the total bytes we'll send with this PUT.
+ put_headers['Content-Length'] = str(file_length - total_bytes_uploaded)
+ http_request = AWSAuthConnection.build_base_http_request(
+ conn, 'PUT', path=self.upload_url_path, auth_path=None,
+ headers=put_headers, host=self.upload_url_host)
+ http_conn.putrequest('PUT', http_request.path)
+ for k in put_headers:
+ http_conn.putheader(k, put_headers[k])
+ http_conn.endheaders()
+
+ # Turn off debug on http connection so upload content isn't included
+ # in debug stream.
+ http_conn.set_debuglevel(0)
+ while buf:
+ http_conn.send(buf)
+ total_bytes_uploaded += len(buf)
+ if cb:
+ i += 1
+ if i == cb_count or cb_count == -1:
+ cb(total_bytes_uploaded, file_length)
+ i = 0
+ buf = fp.read(self.BUFFER_SIZE)
+ http_conn.set_debuglevel(conn.debug)
+ if cb:
+ cb(total_bytes_uploaded, file_length)
+ if total_bytes_uploaded != file_length:
+ # Abort (and delete the tracker file) so if the user retries
+ # they'll start a new resumable upload rather than potentially
+ # attempting to pick back up later where we left off.
+ raise ResumableUploadException(
+ 'File changed during upload: EOF at %d bytes of %d byte file.' %
+ (total_bytes_uploaded, file_length),
+ ResumableTransferDisposition.ABORT)
+ resp = http_conn.getresponse()
+ # Restore http connection debug level.
+ http_conn.set_debuglevel(conn.debug)
+
+ if resp.status == 200:
+ # Success.
+ return (resp.getheader('etag'),
+ resp.getheader('x-goog-generation'),
+ resp.getheader('x-goog-metageneration'))
+ # Retry timeout (408) and status 500 and 503 errors after a delay.
+ elif resp.status in [408, 500, 503]:
+ disposition = ResumableTransferDisposition.WAIT_BEFORE_RETRY
+ else:
+ # Catch all for any other error codes.
+ disposition = ResumableTransferDisposition.ABORT
+ raise ResumableUploadException('Got response code %d while attempting '
+ 'upload (%s)' %
+ (resp.status, resp.reason), disposition)
+
+ def _AttemptResumableUpload(self, key, fp, file_length, headers, cb,
+ num_cb):
+ """Attempts a resumable upload.
+
+ Args:
+ key: Boto key representing object to upload.
+ fp: File pointer containing upload bytes.
+ file_length: Total length of the upload.
+ headers: Headers to be used in upload requests.
+ cb: Progress callback function that takes (progress, total_size).
+ num_cb: Granularity of the callback (maximum number of times the
+ callback will be called during the file transfer). If
negative,
+ perform callback with each buffer read.
+
+ Returns:
+ (etag, generation, metageneration) from service upon success.
+
+ Raises:
+ ResumableUploadException if any problems occur.
+ """
+ (service_start, service_end) = self.SERVICE_HAS_NOTHING
+ conn = key.bucket.connection
+ if self.upload_url:
+ # Try to resume existing resumable upload.
+ try:
+ (service_start, service_end) = (
+ self._QueryServicePos(conn, file_length))
+ self.service_has_bytes = service_start
+ if conn.debug >= 1:
+ self.logger.debug('Resuming transfer.')
+ except ResumableUploadException, e:
+ if conn.debug >= 1:
+ self.logger.debug('Unable to resume transfer (%s).', e.message)
+ self._StartNewResumableUpload(key, headers)
+ else:
+ self._StartNewResumableUpload(key, headers)
+
+ # upload_start_point allows the code that instantiated the
+ # ResumableUploadHandler to find out the point from which it started
+ # uploading (e.g., so it can correctly compute throughput).
+ if self.upload_start_point is None:
+ self.upload_start_point = service_end
+
+ total_bytes_uploaded = service_end + 1
+
+ # Start reading from the file based upon the number of bytes that the
+ # server has so far.
+ if total_bytes_uploaded < file_length:
+ fp.seek(total_bytes_uploaded)
+
+ conn = key.bucket.connection
+
+ # Get a new HTTP connection (vs conn.get_http_connection(), which
reuses
+ # pool connections) because httplib requires a new HTTP connection per
+ # transaction. (Without this, calling http_conn.getresponse() would get
+ # "ResponseNotReady".)
+ http_conn = conn.new_http_connection(self.upload_url_host, conn.port,
+ conn.is_secure)
+ http_conn.set_debuglevel(conn.debug)
+
+ # Make sure to close http_conn at end so if a local file read
+ # failure occurs partway through service will terminate current upload
+ # and can report that progress on next attempt.
+ try:
+ return self._UploadFileBytes(conn, http_conn, fp, file_length,
+ total_bytes_uploaded, cb, num_cb,
+ headers)
+ except (ResumableUploadException, socket.error):
+ resp = self._QueryServiceState(conn, file_length)
+ if resp.status == 400:
+ raise ResumableUploadException(
+ 'Got 400 response from service state query after failed
resumable '
+ 'upload attempt. This can happen for various reasons,
including '
+ 'specifying an invalid request (e.g., an invalid canned ACL)
or '
+ 'if the file size changed between upload attempts',
+ ResumableTransferDisposition.ABORT)
+ else:
+ raise
+ finally:
+ http_conn.close()
+
+ def HandleResumableUploadException(self, e, debug):
+ if e.disposition == ResumableTransferDisposition.ABORT_CUR_PROCESS:
+ if debug >= 1:
+ self.logger.debug('Caught non-retryable ResumableUploadException
(%s); '
+ 'aborting but retaining tracker file', e.message)
+ raise
+ elif e.disposition == ResumableTransferDisposition.ABORT:
+ if debug >= 1:
+ self.logger.debug('Caught non-retryable ResumableUploadException
(%s); '
+ 'aborting and removing tracker file', e.message)
+ raise
+ else:
+ if debug >= 1:
+ self.logger.debug(
+ 'Caught ResumableUploadException (%s) - will retry', e.message)
+
+ def TrackProgressLessIterations(self, service_had_bytes_before_attempt,
+ debug=0):
+ """Tracks the number of iterations without progress.
+
+ Performs randomized exponential backoff.
+
+ Args:
+ service_had_bytes_before_attempt: Number of bytes the service had
prior
+ to this upload attempt.
+ debug: debug level 0..3
+ """
+ # At this point we had a re-tryable failure; see if made progress.
+ if self.service_has_bytes > service_had_bytes_before_attempt:
+ self.progress_less_iterations = 0 # If progress, reset counter.
+ else:
+ self.progress_less_iterations += 1
+
+ if self.progress_less_iterations > self.num_retries:
+ # Don't retry any longer in the current process.
+ raise ResumableUploadException(
+ 'Too many resumable upload attempts failed without '
+ 'progress. You might try this upload again later',
+ ResumableTransferDisposition.ABORT_CUR_PROCESS)
+
+ # Use binary exponential backoff to desynchronize client requests.
+ sleep_time_secs = min(random.random() *
(2**self.progress_less_iterations),
+ GetMaxRetryDelay())
+ if debug >= 1:
+ self.logger.debug('Got retryable failure (%d progress-less in a
row).\n'
+ 'Sleeping %3.1f seconds before re-trying',
+ self.progress_less_iterations, sleep_time_secs)
+ time.sleep(sleep_time_secs)
+
+ def SendFile(self, key, fp, size, headers, canned_acl=None, cb=None,
+ num_cb=XML_PROGRESS_CALLBACKS):
+ """Upload a file to a key into a bucket on GS, resumable upload
protocol.
+
+ Args:
+ key: `boto.s3.key.Key` or subclass representing the upload
destination.
+ fp: File pointer to upload
+ size: Size of the file to upload.
+ headers: The headers to pass along with the PUT request
+ canned_acl: Optional canned ACL to apply to object.
+ cb: Callback function that will be called to report progress on
+ the upload. The callback should accept two integer parameters,
the
+ first representing the number of bytes that have been
successfully
+ transmitted to GS, and the second representing the total number
of
+ bytes that need to be transmitted.
+ num_cb: (optional) If a callback is specified with the cb parameter,
this
+ parameter determines the granularity of the callback by
defining
+ the maximum number of times the callback will be called
during the
+ file transfer. Providing a negative integer will cause your
+ callback to be called with each buffer read.
+
+ Raises:
+ ResumableUploadException if a problem occurs during the transfer.
+ """
+
+ if not headers:
+ headers = {}
+ # If Content-Type header is present and set to None, remove it.
+ # This is gsutil's way of asking boto to refrain from auto-generating
+ # that header.
+ content_type = 'Content-Type'
+ if content_type in headers and headers[content_type] is None:
+ del headers[content_type]
+
+ if canned_acl:
+ headers[key.provider.acl_header] = canned_acl
+
+ headers['User-Agent'] = UserAgent
+
+ file_length = size
+ debug = key.bucket.connection.debug
+
+ # Use num-retries from constructor if one was provided; else check
+ # for a value specified in the boto config file; else default to 5.
+ if self.num_retries is None:
+ self.num_retries = GetNumRetries()
+ self.progress_less_iterations = 0
+
+ while True: # Retry as long as we're making progress.
+ service_had_bytes_before_attempt = self.service_has_bytes
+ try:
+ # Save generation and metageneration in class state so caller
+ # can find these values, for use in preconditions of future
+ # operations on the uploaded object.
+ (_, self.generation, self.metageneration) = (
+ self._AttemptResumableUpload(key, fp, file_length,
+ headers, cb, num_cb))
+
+ key.generation = self.generation
+ if debug >= 1:
+ self.logger.debug('Resumable upload complete.')
+ return
+ except self.RETRYABLE_EXCEPTIONS, e:
+ if debug >= 1:
+ self.logger.debug('Caught exception (%s)', e.__repr__())
+ if isinstance(e, IOError) and e.errno == errno.EPIPE:
+ # Broken pipe error causes httplib to immediately
+ # close the socket (http://bugs.python.org/issue5542),
+ # so we need to close the connection before we resume
+ # the upload (which will cause a new connection to be
+ # opened the next time an HTTP request is sent).
+ key.bucket.connection.connection.close()
+ except ResumableUploadException, e:
+ self.HandleResumableUploadException(e, debug)
+
+ self.TrackProgressLessIterations(service_had_bytes_before_attempt,
+ debug=debug)
=======================================
--- /dev/null
+++ /third_party/gsutil/gslib/boto_translation.py Mon Nov 3 12:38:28 2014
UTC
@@ -0,0 +1,1567 @@
+# -*- coding: utf-8 -*-
+# Copyright 2013 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""XML/boto gsutil Cloud API implementation for GCS and Amazon S3."""
+
+from __future__ import absolute_import
+
+import base64
+import binascii
+import datetime
+import errno
+import httplib
+import json
+import multiprocessing
+import os
+import pickle
+import random
+import re
+import socket
+import tempfile
+import textwrap
+import time
+import xml
+from xml.dom.minidom import parseString as XmlParseString
+from xml.sax import _exceptions as SaxExceptions
+
+import boto
+from boto import handler
+from boto.exception import ResumableDownloadException as
BotoResumableDownloadException
+from boto.exception import ResumableTransferDisposition
+from boto.gs.cors import Cors
+from boto.gs.lifecycle import LifecycleConfig
+from boto.s3.deletemarker import DeleteMarker
+from boto.s3.prefix import Prefix
+
+from gslib.boto_resumable_upload import BotoResumableUpload
+from gslib.cloud_api import AccessDeniedException
+from gslib.cloud_api import ArgumentException
+from gslib.cloud_api import BadRequestException
+from gslib.cloud_api import CloudApi
+from gslib.cloud_api import NotEmptyException
+from gslib.cloud_api import NotFoundException
+from gslib.cloud_api import PreconditionException
+from gslib.cloud_api import ResumableDownloadException
+from gslib.cloud_api import ResumableUploadAbortException
+from gslib.cloud_api import ResumableUploadException
+from gslib.cloud_api import ServiceException
+from gslib.cloud_api_helper import ValidateDstObjectMetadata
+from gslib.exception import CommandException
+from gslib.exception import InvalidUrlError
+from gslib.project_id import GOOG_PROJ_ID_HDR
+from gslib.project_id import PopulateProjectId
+from gslib.storage_url import StorageUrlFromString
+from gslib.third_party.storage_apitools import storage_v1_messages as
apitools_messages
+from gslib.translation_helper import AclTranslation
+from gslib.translation_helper import AddS3MarkerAclToObjectMetadata
+from gslib.translation_helper import CorsTranslation
+from gslib.translation_helper import CreateBucketNotFoundException
+from gslib.translation_helper import CreateObjectNotFoundException
+from gslib.translation_helper import DEFAULT_CONTENT_TYPE
+from gslib.translation_helper import EncodeStringAsLong
+from gslib.translation_helper import GenerationFromUrlAndString
+from gslib.translation_helper import HeadersFromObjectMetadata
+from gslib.translation_helper import LifecycleTranslation
+from gslib.translation_helper import REMOVE_CORS_CONFIG
+from gslib.translation_helper import S3MarkerAclFromObjectMetadata
+from gslib.util import ConfigureNoOpAuthIfNeeded
+from gslib.util import DEFAULT_FILE_BUFFER_SIZE
+from gslib.util import GetFileSize
+from gslib.util import GetMaxRetryDelay
+from gslib.util import GetNumRetries
+from gslib.util import MultiprocessingIsAvailable
+from gslib.util import S3_DELETE_MARKER_GUID
+from gslib.util import TWO_MB
+from gslib.util import UnaryDictToXml
+from gslib.util import UTF8
+from gslib.util import XML_PROGRESS_CALLBACKS
+
+TRANSLATABLE_BOTO_EXCEPTIONS = (boto.exception.BotoServerError,
+ boto.exception.InvalidUriError,
+ boto.exception.ResumableDownloadException,
+ boto.exception.ResumableUploadException,
+ boto.exception.StorageCreateError,
+ boto.exception.StorageResponseError)
+
+# If multiprocessing is available, this will be overridden to a
(thread-safe)
+# multiprocessing.Value in a call to InitializeMultiprocessingVariables.
+boto_auth_initialized = False
+
+NON_EXISTENT_OBJECT_REGEX = re.compile(r'.*non-\s*existent\s*object',
+ flags=re.DOTALL)
+# Determines whether an etag is a valid MD5.
+MD5_REGEX = re.compile(r'^"*[a-fA-F0-9]{32}"*$')
+
+
+def InitializeMultiprocessingVariables():
+ """Perform necessary initialization for multiprocessing.
+
+ See gslib.command.InitializeMultiprocessingVariables for an explanation
+ of why this is necessary.
+ """
+ global boto_auth_initialized # pylint: disable=global-variable-undefined
+ boto_auth_initialized = multiprocessing.Value('i', 0)
+
+
+class BotoTranslation(CloudApi):
+ """Boto-based XML translation implementation of gsutil Cloud API.
+
+ This class takes gsutil Cloud API objects, translates them to XML service
+ calls, and translates the results back into gsutil Cloud API objects for
+ use by the caller.
+ """
+
+ def __init__(self, bucket_storage_uri_class, logger, provider=None,
+ credentials=None, debug=0):
+ """Performs necessary setup for interacting with the cloud storage
provider.
+
+ Args:
+ bucket_storage_uri_class: boto storage_uri class, used by APIs that
+ provide boto translation or mocking.
+ logger: logging.logger for outputting log messages.
+ provider: Provider prefix describing cloud storage provider to
connect to.
+ 'gs' and 's3' are supported. Function implementations
ignore
+ the provider argument and use this one instead.
+ credentials: Unused.
+ debug: Debug level for the API implementation (0..3).
+ """
+ super(BotoTranslation, self).__init__(bucket_storage_uri_class, logger,
+ provider=provider, debug=debug)
+ _ = credentials
+ global boto_auth_initialized # pylint:
disable=global-variable-undefined
+ if MultiprocessingIsAvailable()[0] and not boto_auth_initialized.value:
+ ConfigureNoOpAuthIfNeeded()
+ boto_auth_initialized.value = 1
+ elif not boto_auth_initialized:
+ ConfigureNoOpAuthIfNeeded()
+ boto_auth_initialized = True
+ self.api_version = boto.config.get_value(
+ 'GSUtil', 'default_api_version', '1')
+
+ def GetBucket(self, bucket_name, provider=None, fields=None):
+ """See CloudApi class for function doc strings."""
+ _ = provider
+ bucket_uri = self._StorageUriForBucket(bucket_name)
+ headers = {}
+ self._AddApiVersionToHeaders(headers)
+ try:
+ return self._BotoBucketToBucket(bucket_uri.get_bucket(validate=True,
+
headers=headers),
+ fields=fields)
+ except TRANSLATABLE_BOTO_EXCEPTIONS, e:
+ self._TranslateExceptionAndRaise(e, bucket_name=bucket_name)
+
+ def ListBuckets(self, project_id=None, provider=None, fields=None):
+ """See CloudApi class for function doc strings."""
+ _ = provider
+ get_fields = self._ListToGetFields(list_fields=fields)
+ headers = {}
+ self._AddApiVersionToHeaders(headers)
+ if self.provider == 'gs':
+ headers[GOOG_PROJ_ID_HDR] = PopulateProjectId(project_id)
+ try:
+ provider_uri = boto.storage_uri(
+ '%s://' % self.provider,
+ suppress_consec_slashes=False,
+ bucket_storage_uri_class=self.bucket_storage_uri_class,
+ debug=self.debug)
+
+ buckets_iter = provider_uri.get_all_buckets(headers=headers)
+ for bucket in buckets_iter:
+ if self.provider == 's3' and bucket.name.lower() != bucket.name:
+ # S3 listings can return buckets with upper-case names, but boto
+ # can't successfully call them.
+ continue
+ yield self._BotoBucketToBucket(bucket, fields=get_fields)
+ except TRANSLATABLE_BOTO_EXCEPTIONS, e:
+ self._TranslateExceptionAndRaise(e)
+
+ def PatchBucket(self, bucket_name, metadata, preconditions=None,
+ provider=None, fields=None):
+ """See CloudApi class for function doc strings."""
+ _ = provider
+ bucket_uri = self._StorageUriForBucket(bucket_name)
+ headers = {}
+ self._AddApiVersionToHeaders(headers)
+ try:
+ self._AddPreconditionsToHeaders(preconditions, headers)
+ if metadata.acl:
+ boto_acl = AclTranslation.BotoAclFromMessage(metadata.acl)
+ bucket_uri.set_xml_acl(boto_acl.to_xml(), headers=headers)
+ if metadata.cors:
+ if metadata.cors == REMOVE_CORS_CONFIG:
+ metadata.cors = []
+ boto_cors = CorsTranslation.BotoCorsFromMessage(metadata.cors)
+ bucket_uri.set_cors(boto_cors, False)
+ if metadata.defaultObjectAcl:
+ boto_acl = AclTranslation.BotoAclFromMessage(
+ metadata.defaultObjectAcl)
+ bucket_uri.set_def_xml_acl(boto_acl.to_xml(), headers=headers)
+ if metadata.lifecycle:
+ boto_lifecycle = LifecycleTranslation.BotoLifecycleFromMessage(
+ metadata.lifecycle)
+ bucket_uri.configure_lifecycle(boto_lifecycle, False)
+ if metadata.logging:
+ if self.provider == 'gs':
+ headers[GOOG_PROJ_ID_HDR] = PopulateProjectId(None)
+ if metadata.logging.logBucket and metadata.logging.logObjectPrefix:
+ bucket_uri.enable_logging(metadata.logging.logBucket,
+ metadata.logging.logObjectPrefix,
+ False, headers)
+ else: # Logging field is present and empty. Disable logging.
+ bucket_uri.disable_logging(False, headers)
+ if metadata.versioning:
+ bucket_uri.configure_versioning(metadata.versioning.enabled,
+ headers=headers)
+ if metadata.website:
+ main_page_suffix = metadata.website.mainPageSuffix
+ error_page = metadata.website.notFoundPage
+ bucket_uri.set_website_config(main_page_suffix, error_page)
+ return self.GetBucket(bucket_name, fields=fields)
+ except TRANSLATABLE_BOTO_EXCEPTIONS, e:
+ self._TranslateExceptionAndRaise(e, bucket_name=bucket_name)
+
+ def CreateBucket(self, bucket_name, project_id=None, metadata=None,
+ provider=None, fields=None):
+ """See CloudApi class for function doc strings."""
+ _ = provider
+ bucket_uri = self._StorageUriForBucket(bucket_name)
+ location = ''
+ if metadata and metadata.location:
+ location = metadata.location
+ # Pass storage_class param only if this is a GCS bucket. (In S3 the
+ # storage class is specified on the key object.)
+ headers = {}
+ if bucket_uri.scheme == 'gs':
+ self._AddApiVersionToHeaders(headers)
+ headers[GOOG_PROJ_ID_HDR] = PopulateProjectId(project_id)
+ storage_class = ''
+ if metadata and metadata.storageClass:
+ storage_class = metadata.storageClass
+ try:
+ bucket_uri.create_bucket(headers=headers, location=location,
+ storage_class=storage_class)
+ except TRANSLATABLE_BOTO_EXCEPTIONS, e:
+ self._TranslateExceptionAndRaise(e, bucket_name=bucket_name)
+ else:
+ try:
+ bucket_uri.create_bucket(headers=headers, location=location)
+ except TRANSLATABLE_BOTO_EXCEPTIONS, e:
+ self._TranslateExceptionAndRaise(e, bucket_name=bucket_name)
+ return self.GetBucket(bucket_name, fields=fields)
+
+ def DeleteBucket(self, bucket_name, preconditions=None, provider=None):
+ """See CloudApi class for function doc strings."""
+ _ = provider, preconditions
+ bucket_uri = self._StorageUriForBucket(bucket_name)
+ headers = {}
+ self._AddApiVersionToHeaders(headers)
+ try:
+ bucket_uri.delete_bucket(headers=headers)
+ except TRANSLATABLE_BOTO_EXCEPTIONS, e:
+ translated_exception = self._TranslateBotoException(
+ e, bucket_name=bucket_name)
+ if (translated_exception and
+ 'BucketNotEmpty' in translated_exception.reason):
+ try:
+ if bucket_uri.get_versioning_config():
+ if self.provider == 's3':
+ raise NotEmptyException(
+ 'VersionedBucketNotEmpty (%s). Currently, gsutil does
not '
+ 'support listing or removing S3 DeleteMarkers, so you
may '
+ 'need to delete these using another tool to
successfully '
+ 'delete this bucket.' % bucket_name, status=e.status)
+ raise NotEmptyException(
+ 'VersionedBucketNotEmpty (%s)' % bucket_name,
status=e.status)
+ else:
+ raise NotEmptyException('BucketNotEmpty (%s)' % bucket_name,
+ status=e.status)
+ except TRANSLATABLE_BOTO_EXCEPTIONS, e2:
+ self._TranslateExceptionAndRaise(e2, bucket_name=bucket_name)
+ elif translated_exception and translated_exception.status == 404:
+ raise NotFoundException('Bucket %s does not exist.' % bucket_name)
+ else:
+ self._TranslateExceptionAndRaise(e, bucket_name=bucket_name)
+
+ def ListObjects(self, bucket_name, prefix=None, delimiter=None,
+ all_versions=None, provider=None, fields=None):
+ """See CloudApi class for function doc strings."""
+ _ = provider
+ get_fields = self._ListToGetFields(list_fields=fields)
+ bucket_uri = self._StorageUriForBucket(bucket_name)
+ prefix_list = []
+ headers = {}
+ self._AddApiVersionToHeaders(headers)
+ try:
+ objects_iter = bucket_uri.list_bucket(prefix=prefix or '',
+ delimiter=delimiter or '',
+ all_versions=all_versions,
+ headers=headers)
+ except TRANSLATABLE_BOTO_EXCEPTIONS, e:
+ self._TranslateExceptionAndRaise(e, bucket_name=bucket_name)
+
+ try:
+ for key in objects_iter:
+ if isinstance(key, Prefix):
+ prefix_list.append(key.name)
+ yield CloudApi.CsObjectOrPrefix(key.name,
+
CloudApi.CsObjectOrPrefixType.PREFIX)
+ else:
+ key_to_convert = key
+
+ # Listed keys are populated with these fields during bucket
listing.
+ key_http_fields = set(['bucket', 'etag', 'name', 'updated',
+ 'generation', 'metageneration', 'size'])
+
+ # When fields == None, the caller is requesting all possible
fields.
+ # If the caller requested any fields that are not populated by
bucket
+ # listing, we'll need to make a separate HTTP call for each
object to
+ # get its metadata and populate the remaining fields with the
result.
+ if not get_fields or (get_fields and not
+ get_fields.issubset(key_http_fields)):
+
+ generation = None
+ if getattr(key, 'generation', None):
+ generation = key.generation
+ if getattr(key, 'version_id', None):
+ generation = key.version_id
+ key_to_convert = self._GetBotoKey(bucket_name, key.name,
+ generation=generation)
+ return_object = self._BotoKeyToObject(key_to_convert,
+ fields=get_fields)
+
+ yield CloudApi.CsObjectOrPrefix(return_object,
+
CloudApi.CsObjectOrPrefixType.OBJECT)
+ except TRANSLATABLE_BOTO_EXCEPTIONS, e:
+ self._TranslateExceptionAndRaise(e, bucket_name=bucket_name)
+
+ def GetObjectMetadata(self, bucket_name, object_name, generation=None,
+ provider=None, fields=None):
+ """See CloudApi class for function doc strings."""
+ _ = provider
+ try:
+ return self._BotoKeyToObject(self._GetBotoKey(bucket_name,
object_name,
+ generation=generation),
+ fields=fields)
+ except TRANSLATABLE_BOTO_EXCEPTIONS, e:
+ self._TranslateExceptionAndRaise(e, bucket_name=bucket_name,
+ object_name=object_name,
+ generation=generation)
+
+ def _CurryDigester(self, digester_object):
+ """Curries a digester object into a form consumable by boto.
+
+ Key instantiates its own digesters by calling hash_algs[alg]() [note
there
+ are no arguments to this function]. So in order to pass in our
caught-up
+ digesters during a resumable download, we need to pass the digester
+ object but don't get to look it up based on the algorithm name. Here
we
+ use a lambda to make lookup implicit.
+
+ Args:
+ digester_object: Input object to be returned by the created function.
+
+ Returns:
+ A function which when called will return the input object.
+ """
+ return lambda: digester_object
+
+ def GetObjectMedia(
+ self, bucket_name, object_name, download_stream, provider=None,
+ generation=None, object_size=None,
+ download_strategy=CloudApi.DownloadStrategy.ONE_SHOT,
+ start_byte=0, end_byte=None, progress_callback=None,
+ serialization_data=None, digesters=None):
+ """See CloudApi class for function doc strings."""
+ # This implementation will get the object metadata first if we don't
pass it
+ # in via serialization_data.
+ headers = {}
+ self._AddApiVersionToHeaders(headers)
+ if 'accept-encoding' not in headers:
+ headers['accept-encoding'] = 'gzip'
+ if end_byte:
+ headers['range'] = 'bytes=%s-%s' % (start_byte, end_byte)
+ elif start_byte > 0:
+ headers['range'] = 'bytes=%s-' % (start_byte)
+ else:
+ headers['range'] = 'bytes=%s' % (start_byte)
+
+ # Since in most cases we already made a call to get the object
metadata,
+ # here we avoid an extra HTTP call by unpickling the key. This is
coupled
+ # with the implementation in _BotoKeyToObject.
+ if serialization_data:
+ serialization_dict = json.loads(serialization_data)
+ key = pickle.loads(binascii.a2b_base64(serialization_dict['url']))
+ else:
+ key = self._GetBotoKey(bucket_name, object_name,
generation=generation)
+
+ if digesters and self.provider == 'gs':
+ hash_algs = {}
+ for alg in digesters:
+ hash_algs[alg] = self._CurryDigester(digesters[alg])
+ else:
+ hash_algs = {}
+
+ total_size = object_size or 0
+ if serialization_data:
+ total_size = json.loads(serialization_data)['total_size']
+
+ if download_strategy is CloudApi.DownloadStrategy.RESUMABLE:
+ try:
+ if total_size:
+ num_progress_callbacks = max(int(total_size) / TWO_MB,
+ XML_PROGRESS_CALLBACKS)
+ else:
+ num_progress_callbacks = XML_PROGRESS_CALLBACKS
+ self._PerformResumableDownload(
+ download_stream, key, headers=headers,
callback=progress_callback,
+ num_callbacks=num_progress_callbacks, hash_algs=hash_algs)
+ except TRANSLATABLE_BOTO_EXCEPTIONS, e:
+ self._TranslateExceptionAndRaise(e, bucket_name=bucket_name,
+ object_name=object_name,
+ generation=generation)
+ elif download_strategy is CloudApi.DownloadStrategy.ONE_SHOT:
+ try:
+ self._PerformSimpleDownload(download_stream, key, headers=headers,
+ hash_algs=hash_algs)
+ except TRANSLATABLE_BOTO_EXCEPTIONS, e:
+ self._TranslateExceptionAndRaise(e, bucket_name=bucket_name,
+ object_name=object_name,
+ generation=generation)
+ else:
+ raise ArgumentException('Unsupported DownloadStrategy: %s' %
+ download_strategy)
+
+ if self.provider == 's3':
+ if digesters:
+
+ class HashToDigester(object):
+ """Wrapper class to expose hash digests.
+
+ boto creates its own digesters in s3's get_file, returning
on-the-fly
+ hashes only by way of key.local_hashes. To propagate the digest
back
+ to the caller, this stub class implements the digest() function.
+ """
+
+ def __init__(self, hash_val):
+ self.hash_val = hash_val
+
+ def digest(self): # pylint: disable=invalid-name
+ return self.hash_val
+
+ for alg_name in digesters:
+ if ((download_strategy == CloudApi.DownloadStrategy.RESUMABLE and
+ start_byte != 0) or
+ not ((getattr(key, 'local_hashes', None) and
+ alg_name in key.local_hashes))):
+ # For resumable downloads, boto does not provide a mechanism to
+ # catch up the hash in the case of a partially complete
download.
+ # In this case or in the case where no digest was successfully
+ # calculated, set the digester to None, which indicates that
we'll
+ # need to manually calculate the hash from the local file once
it
+ # is complete.
+ digesters[alg_name] = None
+ else:
+ # Use the on-the-fly hash.
+ digesters[alg_name] =
HashToDigester(key.local_hashes[alg_name])
+
+ def _PerformSimpleDownload(self, download_stream, key, headers=None,
+ hash_algs=None):
+ if not headers:
+ headers = {}
+ self._AddApiVersionToHeaders(headers)
+ try:
+ key.get_contents_to_file(download_stream, headers=headers,
+ hash_algs=hash_algs)
+ except TypeError: # s3 and mocks do not support hash_algs
+ key.get_contents_to_file(download_stream, headers=headers)
+
+ def _PerformResumableDownload(self, fp, key, headers=None, callback=None,
+ num_callbacks=XML_PROGRESS_CALLBACKS,
+ hash_algs=None):
+ """Downloads bytes from key to fp, resuming as needed.
+
+ Args:
+ fp: File pointer into which data should be downloaded
+ key: Key object from which data is to be downloaded
+ headers: Headers to send when retrieving the file
+ callback: (optional) a callback function that will be called to
report
+ progress on the download. The callback should accept two
integer
+ parameters. The first integer represents the number of
+ bytes that have been successfully transmitted from the
service. The
+ second represents the total number of bytes that need to be
+ transmitted.
+ num_callbacks: (optional) If a callback is specified with the
callback
+ parameter, this determines the granularity of the callback
+ by defining the maximum number of times the callback will be
+ called during the file transfer.
+ hash_algs: Dict of hash algorithms to apply to downloaded bytes.
+
+ Raises:
+ ResumableDownloadException on error.
+ """
+ if not headers:
+ headers = {}
+ self._AddApiVersionToHeaders(headers)
+
+ retryable_exceptions = (httplib.HTTPException, IOError, socket.error,
+ socket.gaierror)
+
+ debug = key.bucket.connection.debug
+
+ num_retries = GetNumRetries()
+ progress_less_iterations = 0
+
+ while True: # Retry as long as we're making progress.
+ had_file_bytes_before_attempt = GetFileSize(fp)
+ try:
+ cur_file_size = GetFileSize(fp, position_to_eof=True)
+
+ def DownloadProxyCallback(total_bytes_downloaded, total_size):
+ """Translates a boto callback into a gsutil Cloud API callback.
+
+ Callbacks are originally made by boto.s3.Key.get_file(); here we
take
+ into account that we're resuming a download.
+
+ Args:
+ total_bytes_downloaded: Actual bytes downloaded so far, not
+ including the point we resumed from.
+ total_size: Total size of the download.
+ """
+ if callback:
+ callback(cur_file_size + total_bytes_downloaded, total_size)
+
+ headers = headers.copy()
+ headers['Range'] = 'bytes=%d-%d' % (cur_file_size, key.size - 1)
+ cb = DownloadProxyCallback
+
+ # Disable AWSAuthConnection-level retry behavior, since that would
+ # cause downloads to restart from scratch.
+ try:
+ key.get_file(fp, headers, cb, num_callbacks,
override_num_retries=0,
+ hash_algs=hash_algs)
+ except TypeError:
+ key.get_file(fp, headers, cb, num_callbacks,
override_num_retries=0)
+ fp.flush()
+ # Download succeeded.
+ return
+ except retryable_exceptions, e:
+ if debug >= 1:
+ self.logger.info('Caught exception (%s)', repr(e))
+ if isinstance(e, IOError) and e.errno == errno.EPIPE:
+ # Broken pipe error causes httplib to immediately
+ # close the socket (http://bugs.python.org/issue5542),
+ # so we need to close and reopen the key before resuming
+ # the download.
+ if self.provider == 's3':
+ key.get_file(fp, headers, cb, num_callbacks,
override_num_retries=0)
+ else: # self.provider == 'gs'
+ key.get_file(fp, headers, cb, num_callbacks,
+ override_num_retries=0, hash_algs=hash_algs)
+ except BotoResumableDownloadException, e:
+ if (e.disposition ==
+ ResumableTransferDisposition.ABORT_CUR_PROCESS):
+ raise ResumableDownloadException(e.message)
+ else:
+ if debug >= 1:
+ self.logger.info('Caught ResumableDownloadException (%s) -
will '
+ 'retry', e.message)
+
+ # At this point we had a re-tryable failure; see if made progress.
+ if GetFileSize(fp) > had_file_bytes_before_attempt:
+ progress_less_iterations = 0
+ else:
+ progress_less_iterations += 1
+
+ if progress_less_iterations > num_retries:
+ # Don't retry any longer in the current process.
+ raise ResumableDownloadException(
+ 'Too many resumable download attempts failed without '
+ 'progress. You might try this download again later')
+
+ # Close the key, in case a previous download died partway
+ # through and left data in the underlying key HTTP buffer.
+ # Do this within a try/except block in case the connection is
+ # closed (since key.close() attempts to do a final read, in which
+ # case this read attempt would get an IncompleteRead exception,
+ # which we can safely ignore).
+ try:
+ key.close()
+ except httplib.IncompleteRead:
+ pass
+
+ sleep_time_secs = min(random.random() * (2 **
progress_less_iterations),
+ GetMaxRetryDelay())
+ if debug >= 1:
+ self.logger.info(
+ 'Got retryable failure (%d progress-less in a
row).\nSleeping %d '
+ 'seconds before re-trying', progress_less_iterations,
+ sleep_time_secs)
+ time.sleep(sleep_time_secs)
+
+ def PatchObjectMetadata(self, bucket_name, object_name, metadata,
+ generation=None, preconditions=None,
provider=None,
+ fields=None):
+ """See CloudApi class for function doc strings."""
+ _ = provider
+ object_uri = self._StorageUriForObject(bucket_name, object_name,
+ generation=generation)
+
+ headers = {}
+ self._AddApiVersionToHeaders(headers)
+ meta_headers = HeadersFromObjectMetadata(metadata, self.provider)
+
+ metadata_plus = {}
+ metadata_minus = set()
+ metadata_changed = False
+ for k, v in meta_headers.iteritems():
+ metadata_changed = True
+ if v is None:
+ metadata_minus.add(k)
+ else:
+ metadata_plus[k] = v
+
+ self._AddPreconditionsToHeaders(preconditions, headers)
+
+ if metadata_changed:
+ try:
+ object_uri.set_metadata(metadata_plus, metadata_minus, False,
+ headers=headers)
+ except TRANSLATABLE_BOTO_EXCEPTIONS, e:
+ self._TranslateExceptionAndRaise(e, bucket_name=bucket_name,
+ object_name=object_name,
+ generation=generation)
+
+ if metadata.acl:
+ boto_acl = AclTranslation.BotoAclFromMessage(metadata.acl)
+ try:
+ object_uri.set_xml_acl(boto_acl.to_xml(), key_name=object_name)
+ except TRANSLATABLE_BOTO_EXCEPTIONS, e:
+ self._TranslateExceptionAndRaise(e, bucket_name=bucket_name,
+ object_name=object_name,
+ generation=generation)
+ return self.GetObjectMetadata(bucket_name, object_name,
+ generation=generation, fields=fields)
+
+ def _PerformSimpleUpload(self, dst_uri, upload_stream, md5=None,
+ canned_acl=None, progress_callback=None,
+ headers=None):
+ dst_uri.set_contents_from_file(upload_stream, md5=md5,
policy=canned_acl,
+ cb=progress_callback, headers=headers)
+
+ def _PerformStreamingUpload(self, dst_uri, upload_stream,
canned_acl=None,
+ progress_callback=None, headers=None):
+ if dst_uri.get_provider().supports_chunked_transfer():
+ dst_uri.set_contents_from_stream(upload_stream, policy=canned_acl,
+ cb=progress_callback,
headers=headers)
+ else:
+ # Provider doesn't support chunked transfer, so copy to a temporary
+ # file.
+ (temp_fh, temp_path) = tempfile.mkstemp()
+ try:
+ with open(temp_path, 'wb') as out_fp:
+ stream_bytes = upload_stream.read(DEFAULT_FILE_BUFFER_SIZE)
+ while stream_bytes:
+ out_fp.write(stream_bytes)
+ stream_bytes = upload_stream.read(DEFAULT_FILE_BUFFER_SIZE)
+ with open(temp_path, 'rb') as in_fp:
+ dst_uri.set_contents_from_file(in_fp, policy=canned_acl,
+ headers=headers)
+ finally:
+ os.close(temp_fh)
+ os.unlink(temp_path)
+
+ def _PerformResumableUpload(self, key, upload_stream, upload_size,
+ tracker_callback, canned_acl=None,
+ serialization_data=None,
progress_callback=None,
+ headers=None):
+ resumable_upload = BotoResumableUpload(
+ tracker_callback, self.logger, resume_url=serialization_data)
+ resumable_upload.SendFile(key, upload_stream, upload_size,
+ canned_acl=canned_acl, cb=progress_callback,
+ headers=headers)
+
+ def _UploadSetup(self, object_metadata, preconditions=None):
+ """Shared upload implementation.
+
+ Args:
+ object_metadata: Object metadata describing destination object.
+ preconditions: Optional gsutil Cloud API preconditions.
+
+ Returns:
+ Headers dictionary, StorageUri for upload (based on inputs)
+ """
+ ValidateDstObjectMetadata(object_metadata)
+
+ headers = HeadersFromObjectMetadata(object_metadata, self.provider)
+ self._AddApiVersionToHeaders(headers)
+
+ if object_metadata.crc32c:
+ if 'x-goog-hash' in headers:
+ headers['x-goog-hash'] += (
+ ',crc32c=%s' % object_metadata.crc32c.rstrip('\n'))
+ else:
+ headers['x-goog-hash'] = (
+ 'crc32c=%s' % object_metadata.crc32c.rstrip('\n'))
+ if object_metadata.md5Hash:
+ if 'x-goog-hash' in headers:
+ headers['x-goog-hash'] += (
+ ',md5=%s' % object_metadata.md5Hash.rstrip('\n'))
+ else:
+ headers['x-goog-hash'] = (
+ 'md5=%s' % object_metadata.md5Hash.rstrip('\n'))
+
+ if 'content-type' in headers and not headers['content-type']:
+ headers['content-type'] = 'application/octet-stream'
+
+ self._AddPreconditionsToHeaders(preconditions, headers)
+
+ dst_uri = self._StorageUriForObject(object_metadata.bucket,
+ object_metadata.name)
+ return headers, dst_uri
+
+ def _HandleSuccessfulUpload(self, dst_uri, object_metadata, fields=None):
+ """Set ACLs on an uploaded object and return its metadata.
+
+ Args:
+ dst_uri: Generation-specific StorageUri describing the object.
+ object_metadata: Metadata for the object, including an ACL if
applicable.
+ fields: If present, return only these Object metadata fields.
+
+ Returns:
+ gsutil Cloud API Object metadata.
+
+ Raises:
+ CommandException if the object was overwritten / deleted
concurrently.
+ """
+ try:
+ # The XML API does not support if-generation-match for GET requests.
+ # Therefore, if the object gets overwritten before the ACL and
get_key
+ # operations, the best we can do is warn that it happened.
+ self._SetObjectAcl(object_metadata, dst_uri)
+ return self._BotoKeyToObject(dst_uri.get_key(), fields=fields)
+ except boto.exception.InvalidUriError as e:
+ check_for_str = 'Attempt to get key for "%s" failed.' % dst_uri.uri
+ if check_for_str in e.message:
+ raise CommandException('\n'.join(textwrap.wrap(
+ 'Uploaded object (%s) was deleted or overwritten immediately '
+ 'after it was uploaded. This can happen if you attempt to
upload '
+ 'to the same object multiple times concurrently.' %
dst_uri.uri)))
+ else:
+ raise
+
+ def _SetObjectAcl(self, object_metadata, dst_uri):
+ """Sets the ACL (if present in object_metadata) on an uploaded
object."""
+ if object_metadata.acl:
+ boto_acl = AclTranslation.BotoAclFromMessage(object_metadata.acl)
+ dst_uri.set_xml_acl(boto_acl.to_xml())
+ elif self.provider == 's3':
+ s3_acl = S3MarkerAclFromObjectMetadata(object_metadata)
+ if s3_acl:
+ dst_uri.set_xml_acl(s3_acl)
+
+ def UploadObjectResumable(
+ self, upload_stream, object_metadata, canned_acl=None,
preconditions=None,
+ provider=None, fields=None, size=None, serialization_data=None,
+ tracker_callback=None, progress_callback=None):
+ """See CloudApi class for function doc strings."""
+ if self.provider == 's3':
+ # Resumable uploads are not supported for s3.
+ return self.UploadObject(
+ upload_stream, object_metadata, canned_acl=canned_acl,
+ preconditions=preconditions, fields=fields, size=size)
+ headers, dst_uri = self._UploadSetup(object_metadata,
+ preconditions=preconditions)
+ if not tracker_callback:
+ raise ArgumentException('No tracker callback function set for '
+ 'resumable upload of %s' % dst_uri)
+ try:
+ self._PerformResumableUpload(dst_uri.new_key(headers=headers),
+ upload_stream, size, tracker_callback,
+ canned_acl=canned_acl,
+ serialization_data=serialization_data,
+ progress_callback=progress_callback,
+ headers=headers)
+ return self._HandleSuccessfulUpload(dst_uri, object_metadata,
+ fields=fields)
+ except TRANSLATABLE_BOTO_EXCEPTIONS, e:
+ self._TranslateExceptionAndRaise(e,
bucket_name=object_metadata.bucket,
+ object_name=object_metadata.name)
+
+ def UploadObjectStreaming(self, upload_stream, object_metadata,
+ canned_acl=None, progress_callback=None,
+ preconditions=None, provider=None,
fields=None):
+ """See CloudApi class for function doc strings."""
+ headers, dst_uri = self._UploadSetup(object_metadata,
+ preconditions=preconditions)
+
+ try:
+ self._PerformStreamingUpload(
+ dst_uri, upload_stream, canned_acl=canned_acl,
+ progress_callback=progress_callback, headers=headers)
+ return self._HandleSuccessfulUpload(dst_uri, object_metadata,
+ fields=fields)
+ except TRANSLATABLE_BOTO_EXCEPTIONS, e:
+ self._TranslateExceptionAndRaise(e,
bucket_name=object_metadata.bucket,
+ object_name=object_metadata.name)
+
+ def UploadObject(self, upload_stream, object_metadata, canned_acl=None,
+ preconditions=None, size=None, progress_callback=None,
+ provider=None, fields=None):
+ """See CloudApi class for function doc strings."""
+ headers, dst_uri = self._UploadSetup(object_metadata,
+ preconditions=preconditions)
+
+ try:
+ md5 = None
+ if object_metadata.md5Hash:
+ md5 = []
+ # boto expects hex at index 0, base64 at index 1
+ md5.append(binascii.hexlify(
+ base64.decodestring(object_metadata.md5Hash.strip('\n"\''))))
+ md5.append(object_metadata.md5Hash.strip('\n"\''))
+ self._PerformSimpleUpload(dst_uri, upload_stream, md5=md5,
+ canned_acl=canned_acl,
+ progress_callback=progress_callback,
+ headers=headers)
+ return self._HandleSuccessfulUpload(dst_uri, object_metadata,
+ fields=fields)
+ except TRANSLATABLE_BOTO_EXCEPTIONS, e:
+ self._TranslateExceptionAndRaise(e,
bucket_name=object_metadata.bucket,
+ object_name=object_metadata.name)
+
+ def DeleteObject(self, bucket_name, object_name, preconditions=None,
+ generation=None, provider=None):
+ """See CloudApi class for function doc strings."""
+ _ = provider
+ headers = {}
+ self._AddApiVersionToHeaders(headers)
+ self._AddPreconditionsToHeaders(preconditions, headers)
+
+ uri = self._StorageUriForObject(bucket_name, object_name,
+ generation=generation)
+ try:
+ uri.delete_key(validate=False, headers=headers)
+ except TRANSLATABLE_BOTO_EXCEPTIONS, e:
+ self._TranslateExceptionAndRaise(e, bucket_name=bucket_name,
+ object_name=object_name,
+ generation=generation)
+
+ def CopyObject(self, src_bucket_name, src_obj_name, dst_obj_metadata,
+ src_generation=None, canned_acl=None, preconditions=None,
+ provider=None, fields=None):
+ """See CloudApi class for function doc strings."""
+ _ = provider
+ dst_uri = self._StorageUriForObject(dst_obj_metadata.bucket,
+ dst_obj_metadata.name)
+
+ # Usually it's okay to treat version_id and generation as
+ # the same, but in this case the underlying boto call determines the
+ # provider based on the presence of one or the other.
+ src_version_id = None
+ if self.provider == 's3':
+ src_version_id = src_generation
+ src_generation = None
+
+ headers = HeadersFromObjectMetadata(dst_obj_metadata, self.provider)
+ self._AddApiVersionToHeaders(headers)
+ self._AddPreconditionsToHeaders(preconditions, headers)
+
+ if canned_acl:
+ headers[dst_uri.get_provider().acl_header] = canned_acl
+
+ preserve_acl = True if dst_obj_metadata.acl else False
+ if self.provider == 's3':
+ s3_acl = S3MarkerAclFromObjectMetadata(dst_obj_metadata)
+ if s3_acl:
+ preserve_acl = True
+
+ try:
+ new_key = dst_uri.copy_key(
+ src_bucket_name, src_obj_name, preserve_acl=preserve_acl,
+ headers=headers, src_version_id=src_version_id,
+ src_generation=src_generation)
+
+ return self._BotoKeyToObject(new_key, fields=fields)
+ except TRANSLATABLE_BOTO_EXCEPTIONS, e:
+ self._TranslateExceptionAndRaise(e, dst_obj_metadata.bucket,
+ dst_obj_metadata.name)
+
+ def ComposeObject(self, src_objs_metadata, dst_obj_metadata,
+ preconditions=None, provider=None, fields=None):
+ """See CloudApi class for function doc strings."""
+ _ = provider
+ ValidateDstObjectMetadata(dst_obj_metadata)
+
+ dst_obj_name = dst_obj_metadata.name
+ dst_obj_metadata.name = None
+ dst_bucket_name = dst_obj_metadata.bucket
+ dst_obj_metadata.bucket = None
+ headers = HeadersFromObjectMetadata(dst_obj_metadata, self.provider)
+ if not dst_obj_metadata.contentType:
+ dst_obj_metadata.contentType = DEFAULT_CONTENT_TYPE
+ headers['content-type'] = dst_obj_metadata.contentType
+ self._AddApiVersionToHeaders(headers)
+ self._AddPreconditionsToHeaders(preconditions, headers)
+
+ dst_uri = self._StorageUriForObject(dst_bucket_name, dst_obj_name)
+
+ src_components = []
+ for src_obj in src_objs_metadata:
+ src_uri = self._StorageUriForObject(dst_bucket_name, src_obj.name,
+ generation=src_obj.generation)
+ src_components.append(src_uri)
+
+ try:
+ dst_uri.compose(src_components, headers=headers)
+
+ return self.GetObjectMetadata(dst_bucket_name, dst_obj_name,
+ fields=fields)
+ except TRANSLATABLE_BOTO_EXCEPTIONS, e:
+ self._TranslateExceptionAndRaise(e, dst_obj_metadata.bucket,
+ dst_obj_metadata.name)
+
+ def _AddPreconditionsToHeaders(self, preconditions, headers):
+ """Adds preconditions (if any) to headers."""
+ if preconditions and self.provider == 'gs':
+ if preconditions.gen_match:
+ headers['x-goog-if-generation-match'] = preconditions.gen_match
+ if preconditions.meta_gen_match:
+ headers['x-goog-if-metageneration-match'] =
preconditions.meta_gen_match
+
+ def _AddApiVersionToHeaders(self, headers):
+ if self.provider == 'gs':
+ headers['x-goog-api-version'] = self.api_version
+
+ def _GetMD5FromETag(self, src_etag):
+ """Returns an MD5 from the etag iff the etag is a valid MD5 hash.
+
+ Args:
+ src_etag: Object etag for which to return the MD5.
+
+ Returns:
+ MD5 in hex string format, or None.
+ """
+ if src_etag and MD5_REGEX.search(src_etag):
+ return src_etag.strip('"\'').lower()
+
+ def _StorageUriForBucket(self, bucket):
+ """Returns a boto storage_uri for the given bucket name.
+
+ Args:
+ bucket: Bucket name (string).
+
+ Returns:
+ Boto storage_uri for the bucket.
+ """
+ return boto.storage_uri(
+ '%s://%s' % (self.provider, bucket),
+ suppress_consec_slashes=False,
+ bucket_storage_uri_class=self.bucket_storage_uri_class,
+ debug=self.debug)
+
+ def _StorageUriForObject(self, bucket, object_name, generation=None):
+ """Returns a boto storage_uri for the given object.
+
+ Args:
+ bucket: Bucket name (string).
+ object_name: Object name (string).
+ generation: Generation or version_id of object. If None, live
version
+ of the object is used.
+
+ Returns:
+ Boto storage_uri for the object.
+ """
+ uri_string = '%s://%s/%s' % (self.provider, bucket, object_name)
+ if generation:
+ uri_string += '#%s' % generation
+ return boto.storage_uri(
+ uri_string, suppress_consec_slashes=False,
+ bucket_storage_uri_class=self.bucket_storage_uri_class,
+ debug=self.debug)
+
+ def _GetBotoKey(self, bucket_name, object_name, generation=None):
+ """Gets the boto key for an object.
+
+ Args:
+ bucket_name: Bucket containing the object.
+ object_name: Object name.
***The diff for this file has been truncated for email.***
=======================================
--- /dev/null
+++ /third_party/gsutil/gslib/cat_helper.py Mon Nov 3 12:38:28 2014 UTC
@@ -0,0 +1,85 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Helper for cat and cp streaming download."""
+
+from __future__ import absolute_import
+
+import sys
+
+from gslib.exception import CommandException
+from gslib.wildcard_iterator import StorageUrlFromString
+
+
+class CatHelper(object):
+
+ def __init__(self, command_obj):
+ """Initializes the helper object.
+
+ Args:
+ command_obj: gsutil command instance of calling command.
+ """
+ self.command_obj = command_obj
+
+ def CatUrlStrings(self, url_strings, show_header=False, start_byte=0,
+ end_byte=None):
+ """Prints each of the url strings to stdout.
+
+ Args:
+ url_strings: String iterable.
+ show_header: If true, print a header per file.
+ start_byte: Starting byte of the file to print, used for constructing
+ range requests.
+ end_byte: Ending byte of the file to print; used for constructing
range
+ requests. If this is negative, the start_byte is ignored
and
+ and end range is sent over HTTP (such as range: bytes -9)
+ Returns:
+ 0 on success.
+
+ Raises:
+ CommandException if no URLs can be found.
+ """
+ printed_one = False
+ # We manipulate the stdout so that all other data other than the Object
+ # contents go to stderr.
+ cat_outfd = sys.stdout
+ sys.stdout = sys.stderr
+ try:
+ for url_str in url_strings:
+ did_some_work = False
+ # TODO: Get only the needed fields here.
+ for blr in
self.command_obj.WildcardIterator(url_str).IterObjects():
+ did_some_work = True
+ if show_header:
+ if printed_one:
+ print
+ print '==> %s <==' % blr
+ printed_one = True
+ cat_object = blr.root_object
+ storage_url = StorageUrlFromString(blr.url_string)
+ if storage_url.IsCloudUrl():
+ self.command_obj.gsutil_api.GetObjectMedia(
+ cat_object.bucket, cat_object.name, cat_outfd,
+ start_byte=start_byte, end_byte=end_byte,
+ object_size=cat_object.size,
generation=storage_url.generation,
+ provider=storage_url.scheme)
+ else:
+ cat_outfd.write(open(storage_url.object_name, 'rb').read())
+ if not did_some_work:
+ raise CommandException('No URLs matched %s' % url_str)
+ sys.stdout = cat_outfd
+ finally:
+ sys.stdout = cat_outfd
+
+ return 0
=======================================
--- /dev/null
+++ /third_party/gsutil/gslib/cloud_api.py Mon Nov 3 12:38:28 2014 UTC
@@ -0,0 +1,611 @@
+# -*- coding: utf-8 -*-
+# Copyright 2013 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Gsutil API for interacting with cloud storage providers."""
+
+from __future__ import absolute_import
+
+
+class CloudApi(object):
+ """Abstract base class for interacting with cloud storage providers.
+
+ Implementations of the gsutil Cloud API are not guaranteed to be
thread-safe.
+ Behavior when calling a gsutil Cloud API instance simultaneously across
+ threads is undefined and doing so will likely cause errors. Therefore,
+ a separate instance of the gsutil Cloud API should be instantiated
per-thread.
+ """
+
+ def __init__(self, bucket_storage_uri_class, logger, provider=None,
debug=0):
+ """Performs necessary setup for interacting with the cloud storage
provider.
+
+ Args:
+ bucket_storage_uri_class: boto storage_uri class, used by APIs that
+ provide boto translation or mocking.
+ logger: logging.logger for outputting log messages.
+ provider: Default provider prefix describing cloud storage provider
to
+ connect to.
+ debug: Debug level for the API implementation (0..3).
+ """
+ self.bucket_storage_uri_class = bucket_storage_uri_class
+ self.logger = logger
+ self.provider = provider
+ self.debug = debug
+
+ def GetBucket(self, bucket_name, provider=None, fields=None):
+ """Gets Bucket metadata.
+
+ Args:
+ bucket_name: Name of the bucket.
+ provider: Cloud storage provider to connect to. If not present,
+ class-wide default is used.
+ fields: If present, return only these Bucket metadata fields, for
+ example, ['logging', 'defaultObjectAcl']
+
+ Raises:
+ ArgumentException for errors during input validation.
+ ServiceException for errors interacting with cloud storage providers.
+
+ Returns:
+ Bucket object.
+ """
+ raise NotImplementedError('GetBucket must be overloaded')
+
+ def ListBuckets(self, project_id=None, provider=None, fields=None):
+ """Lists bucket metadata for the given project.
+
+ Args:
+ project_id: Project owning the buckets, default from config if None.
+ provider: Cloud storage provider to connect to. If not present,
+ class-wide default is used.
+ fields: If present, return only these metadata fields for the
listing,
+ for example:
+ ['items/logging', 'items/defaultObjectAcl'].
+ Note that the WildcardIterator class should be used to list
+ buckets instead of calling this function directly. It amends
+ the fields definition from get-like syntax such as
+ ['logging', 'defaultObjectAcl'] so that the caller does not
+ need to prepend 'items/' or specify fields necessary for
listing
+ (like nextPageToken).
+
+ Raises:
+ ArgumentException for errors during input validation.
+ ServiceException for errors interacting with cloud storage providers.
+
+ Returns:
+ Iterator over Bucket objects.
+ """
+ raise NotImplementedError('ListBuckets must be overloaded')
+
+ def PatchBucket(self, bucket_name, metadata, preconditions=None,
+ provider=None, fields=None):
+ """Updates bucket metadata for the bucket with patch semantics.
+
+ Args:
+ bucket_name: Name of bucket to update.
+ metadata: Bucket object defining metadata to be updated.
+ preconditions: Preconditions for the request.
+ provider: Cloud storage provider to connect to. If not present,
+ class-wide default is used.
+ fields: If present, return only these Bucket metadata fields.
+
+ Raises:
+ ArgumentException for errors during input validation.
+ ServiceException for errors interacting with cloud storage providers.
+
+ Returns:
+ Bucket object describing new bucket metadata.
+ """
+ raise NotImplementedError('PatchBucket must be overloaded')
+
+ def CreateBucket(self, bucket_name, project_id=None, metadata=None,
+ provider=None, fields=None):
+ """Creates a new bucket with the specified metadata.
+
+ Args:
+ bucket_name: Name of the new bucket.
+ project_id: Project owner of the new bucket, default from config if
None.
+ metadata: Bucket object defining new bucket metadata.
+ provider: Cloud storage provider to connect to. If not present,
+ class-wide default is used.
+ fields: If present, return only these Bucket metadata fields.
+
+ Raises:
+ ArgumentException for errors during input validation.
+ ServiceException for errors interacting with cloud storage providers.
+
+ Returns:
+ Bucket object describing new bucket metadata.
+ """
+ raise NotImplementedError('CreateBucket must be overloaded')
+
+ def DeleteBucket(self, bucket_name, preconditions=None, provider=None):
+ """Deletes a bucket.
+
+ Args:
+ bucket_name: Name of the bucket to delete.
+ preconditions: Preconditions for the request.
+ provider: Cloud storage provider to connect to. If not present,
+ class-wide default is used.
+
+ Raises:
+ ArgumentException for errors during input validation.
+ ServiceException for errors interacting with cloud storage providers.
+
+ Returns:
+ None.
+ """
+ raise NotImplementedError('DeleteBucket must be overloaded')
+
+ class CsObjectOrPrefixType(object):
+ """Enum class for describing CsObjectOrPrefix types."""
+ OBJECT = 'object' # Cloud object
+ PREFIX = 'prefix' # Cloud bucket subdirectory
+
+ class CsObjectOrPrefix(object):
+ """Container class for ListObjects results."""
+
+ def __init__(self, data, datatype):
+ """Stores a ListObjects result.
+
+ Args:
+ data: Root object, either an apitools Object or a string Prefix.
+ datatype: CsObjectOrPrefixType of data.
+ """
+ self.data = data
+ self.datatype = datatype
+
+ def ListObjects(self, bucket_name, prefix=None, delimiter=None,
+ all_versions=None, provider=None, fields=None):
+ """Lists objects (with metadata) and prefixes in a bucket.
+
+ Args:
+ bucket_name: Bucket containing the objects.
+ prefix: Prefix for directory-like behavior.
+ delimiter: Delimiter for directory-like behavior.
+ all_versions: If true, list all object versions.
+ provider: Cloud storage provider to connect to. If not present,
+ class-wide default is used.
+ fields: If present, return only these metadata fields for the
listing,
+ for example:
+ ['items/acl', 'items/updated', 'prefixes'].
+ Note that the WildcardIterator class should be used to list
+ objects instead of calling this function directly. It amends
+ the fields definition from get-like syntax such as
+ ['acl', 'updated'] so that the caller does not need to
+ prepend 'items/' or specify any fields necessary for listing
+ (such as prefixes or nextPageToken).
+
+ Raises:
+ ArgumentException for errors during input validation.
+ ServiceException for errors interacting with cloud storage providers.
+
+ Returns:
+ Iterator over CsObjectOrPrefix wrapper class.
+ """
+ raise NotImplementedError('ListObjects must be overloaded')
+
+ def GetObjectMetadata(self, bucket_name, object_name, generation=None,
+ provider=None, fields=None):
+ """Gets object metadata.
+
+ Args:
+ bucket_name: Bucket containing the object.
+ object_name: Object name.
+ generation: Generation of the object to retrieve.
+ provider: Cloud storage provider to connect to. If not present,
+ class-wide default is used.
+ fields: If present, return only these Object metadata fields, for
+ example, ['acl', 'updated'].
+
+ Raises:
+ ArgumentException for errors during input validation.
+ ServiceException for errors interacting with cloud storage providers.
+
+ Returns:
+ Object object.
+ """
+ raise NotImplementedError('GetObjectMetadata must be overloaded')
+
+ def PatchObjectMetadata(self, bucket_name, object_name, metadata,
+ generation=None, preconditions=None,
provider=None,
+ fields=None):
+ """Updates object metadata with patch semantics.
+
+ Args:
+ bucket_name: Bucket containing the object.
+ object_name: Object name for object.
+ metadata: Object object defining metadata to be updated.
+ generation: Generation (or version) of the object to update.
+ preconditions: Preconditions for the request.
+ provider: Cloud storage provider to connect to. If not present,
+ class-wide default is used.
+ fields: If present, return only these Object metadata fields.
+
+ Raises:
+ ArgumentException for errors during input validation.
+ ServiceException for errors interacting with cloud storage providers.
+
+ Returns:
+ Updated object metadata.
+ """
+ raise NotImplementedError('PatchObjectMetadata must be overloaded')
+
+ class DownloadStrategy(object):
+ """Enum class for specifying download strategy."""
+ ONE_SHOT = 'oneshot'
+ RESUMABLE = 'resumable'
+
+ def GetObjectMedia(self, bucket_name, object_name, download_stream,
+ provider=None, generation=None, object_size=None,
+ download_strategy=DownloadStrategy.ONE_SHOT,
start_byte=0,
+ end_byte=None, progress_callback=None,
+ serialization_data=None, digesters=None):
+ """Gets object data.
+
+ Args:
+ bucket_name: Bucket containing the object.
+ object_name: Object name.
+ download_stream: Stream to send the object data to.
+ provider: Cloud storage provider to connect to. If not present,
+ class-wide default is used.
+ generation: Generation of the object to retrieve.
+ object_size: Total size of the object being downloaded.
+ download_strategy: Cloud API download strategy to use for download.
+ start_byte: Starting point for download (for resumable downloads and
+ range requests). Can be set to negative to request a
range
+ of bytes (python equivalent of [:-3])
+ end_byte: Ending point for download (for range requests).
+ progress_callback: Optional callback function for progress
notifications.
+ Receives calls with arguments
+ (bytes_transferred, total_size).
+ serialization_data: Implementation-specific dict containing
serialization
+ information for the download.
+ digesters: Dict of {string : digester}, where string is a name of a
hash
+ algorithm, and digester is a validation digester that
supports
+ update(bytes) and digest() using that algorithm.
+ Implementation can set the digester value to None to
indicate
+ bytes were not successfully digested on-the-fly.
+
+ Raises:
+ ArgumentException for errors during input validation.
+ ServiceException for errors interacting with cloud storage providers.
+
+ Returns:
+ Content-encoding string if it was detected that the server sent an
encoded
+ object during transfer, None otherwise.
+ """
+ raise NotImplementedError('GetObjectMedia must be overloaded')
+
+ def UploadObject(self, upload_stream, object_metadata, canned_acl=None,
+ size=None, preconditions=None, progress_callback=None,
+ provider=None, fields=None):
+ """Uploads object data and metadata.
+
+ Args:
+ upload_stream: Seekable stream of object data.
+ object_metadata: Object metadata for new object. Must include bucket
+ and object name.
+ canned_acl: Optional canned ACL to apply to object. Overrides ACL set
+ in object_metadata.
+ size: Optional object size.
+ preconditions: Preconditions for the request.
+ progress_callback: Optional callback function for progress
notifications.
+ Receives calls with arguments
+ (bytes_transferred, total_size).
+ provider: Cloud storage provider to connect to. If not present,
+ class-wide default is used.
+ fields: If present, return only these Object metadata fields.
+
+ Raises:
+ ArgumentException for errors during input validation.
+ ServiceException for errors interacting with cloud storage providers.
+
+ Returns:
+ Object object for newly created destination object.
+ """
+ raise NotImplementedError('UploadObject must be overloaded')
+
+ def UploadObjectStreaming(self, upload_stream, object_metadata,
+ canned_acl=None, preconditions=None,
+ progress_callback=None, provider=None,
+ fields=None):
+ """Uploads object data and metadata.
+
+ Args:
+ upload_stream: Stream of object data. May not be seekable.
+ object_metadata: Object metadata for new object. Must include bucket
+ and object name.
+ canned_acl: Optional canned ACL to apply to object. Overrides ACL set
+ in object_metadata.
+ preconditions: Preconditions for the request.
+ progress_callback: Optional callback function for progress
notifications.
+ Receives calls with arguments
+ (bytes_transferred, total_size), but fills in only
+ bytes_transferred.
+ provider: Cloud storage provider to connect to. If not present,
+ class-wide default is used.
+ fields: If present, return only these Object metadata fields.
+
+ Raises:
+ ArgumentException for errors during input validation.
+ ServiceException for errors interacting with cloud storage providers.
+
+ Returns:
+ Object object for newly created destination object.
+ """
+ raise NotImplementedError('UploadObject must be overloaded')
+
+ def UploadObjectResumable(
+ self, upload_stream, object_metadata, canned_acl=None,
+ size=None, preconditions=None, serialization_data=None,
+ tracker_callback=None, progress_callback=None, provider=None,
+ fields=None):
+ """Uploads object data and metadata using a resumable upload strategy.
+
+ Args:
+ upload_stream: Seekable stream of object data.
+ object_metadata: Object metadata for new object. Must include bucket
+ and object name.
+ canned_acl: Optional canned ACL to apply to object. Overrides ACL set
+ in object_metadata.
+ size: Total size of the object.
+ preconditions: Preconditions for the request.
+ serialization_data: Dict of {'url' : UploadURL} allowing for uploads
to
+ be resumed.
+ tracker_callback: Callback function taking a upload URL string.
+ Guaranteed to be called when the implementation
gets an
+ upload URL, allowing the caller to resume the
upload
+ across process breaks by saving the upload URL in
+ a tracker file.
+ progress_callback: Optional callback function for progress
notifications.
+ Receives calls with arguments
+ (bytes_transferred, total_size).
+ provider: Cloud storage provider to connect to. If not present,
+ class-wide default is used.
+ fields: If present, return only these Object metadata fields when the
+ upload is complete.
+
+ Raises:
+ ArgumentException for errors during input validation.
+ ServiceException for errors interacting with cloud storage providers.
+
+ Returns:
+ Object object for newly created destination object.
+ """
+ raise NotImplementedError('UploadObjectResumable must be overloaded')
+
+ def CopyObject(self, src_bucket_name, src_obj_name, dst_obj_metadata,
+ src_generation=None, canned_acl=None, preconditions=None,
+ provider=None, fields=None):
+ """Copies an object in the cloud.
+
+ Args:
+ src_bucket_name: Bucket containing the source object
+ src_obj_name: Name of the source object.
+ dst_obj_metadata: Object metadata for new object. Must include
bucket
+ and object name.
+ src_generation: Generation of the source object to copy.
+ canned_acl: Optional canned ACL to apply to destination object.
Overrides
+ ACL set in dst_obj_metadata.
+ preconditions: Destination object preconditions for the request.
+ provider: Cloud storage provider to connect to. If not present,
+ class-wide default is used.
+ fields: If present, return only these Object metadata fields.
+
+ Raises:
+ ArgumentException for errors during input validation.
+ ServiceException for errors interacting with cloud storage providers.
+
+ Returns:
+ Object object for newly created destination object.
+ """
+ raise NotImplementedError('CopyObject must be overloaded')
+
+ def ComposeObject(self, src_objs_metadata, dst_obj_metadata,
+ preconditions=None, provider=None, fields=None):
+ """Composes an object in the cloud.
+
+ Args:
+ src_objs_metadata: List of
ComposeRequest.SourceObjectsValueListEntries
+ specifying the objects to compose.
+ dst_obj_metadata: Metadata for the destination object including
bucket
+ and object name.
+ preconditions: Destination object preconditions for the request.
+ provider: Cloud storage provider to connect to. If not present,
+ class-wide default is used.
+ fields: If present, return only these Object metadata fields.
+
+ Raises:
+ ArgumentException for errors during input validation.
+ ServiceException for errors interacting with cloud storage providers.
+
+ Returns:
+ Composed object metadata.
+ """
+ raise NotImplementedError('ComposeObject must be overloaded')
+
+ def DeleteObject(self, bucket_name, object_name, preconditions=None,
+ generation=None, provider=None):
+ """Deletes an object.
+
+ Args:
+ bucket_name: Name of the containing bucket.
+ object_name: Name of the object to delete.
+ preconditions: Preconditions for the request.
+ generation: Generation (or version) of the object to delete; if None,
+ deletes the live object.
+ provider: Cloud storage provider to connect to. If not present,
+ class-wide default is used.
+
+ Raises:
+ ArgumentException for errors during input validation.
+ ServiceException for errors interacting with cloud storage providers.
+
+ Returns:
+ None.
+ """
+ raise NotImplementedError('DeleteObject must be overloaded')
+
+ def WatchBucket(self, bucket_name, address, channel_id, token=None,
+ provider=None, fields=None):
+ """Creates a notification subscription for changes to objects in a
bucket.
+
+ Args:
+ bucket_name: Bucket containing the objects.
+ address: Address to which to send notifications.
+ channel_id: Unique ID string for the channel.
+ token: If present, token string is delivered with each notification.
+ provider: Cloud storage provider to connect to. If not present,
+ class-wide default is used.
+ fields: If present, return only these Channel metadata fields.
+
+ Raises:
+ ArgumentException for errors during input validation.
+ ServiceException for errors interacting with cloud storage providers.
+
+ Returns:
+ Channel object describing the notification subscription.
+ """
+ raise NotImplementedError('WatchBucket must be overloaded')
+
+ def StopChannel(self, channel_id, resource_id, provider=None):
+ """Stops a notification channel.
+
+ Args:
+ channel_id: Unique ID string for the channel.
+ resource_id: Version-agnostic ID string for the channel.
+ provider: Cloud storage provider to connect to. If not present,
+ class-wide default is used.
+
+ Raises:
+ ArgumentException for errors during input validation.
+ ServiceException for errors interacting with cloud storage providers.
+
+ Returns:
+ None.
+ """
+ raise NotImplementedError('StopChannel must be overloaded')
+
+
+class Preconditions(object):
+ """Preconditions class for specifying preconditions to cloud API
requests."""
+
+ def __init__(self, gen_match=None, meta_gen_match=None):
+ """Instantiates a Preconditions object.
+
+ Args:
+ gen_match: Perform request only if generation of target object
+ matches the given integer. Ignored for bucket requests.
+ meta_gen_match: Perform request only if metageneration of target
+ object/bucket matches the given integer.
+ """
+ self.gen_match = gen_match
+ self.meta_gen_match = meta_gen_match
+
+
+class ArgumentException(Exception):
+ """Exception raised when arguments to a Cloud API method are invalid.
+
+ This exception is never raised as a result of a failed call to a cloud
+ storage provider.
+ """
+
+ def __init__(self, reason):
+ Exception.__init__(self)
+ self.reason = reason
+
+ def __repr__(self):
+ return str(self)
+
+ def __str__(self):
+ return '%s: %s' % (self.__class__.__name__, self.reason)
+
+
+class ProjectIdException(ArgumentException):
+ """Exception raised when a Project ID argument is required but not
present."""
+
+
+class ServiceException(Exception):
+ """Exception raised when a cloud storage provider request fails.
+
+ This exception is raised only as a result of a failed remote call.
+ """
+
+ def __init__(self, reason, status=None, body=None):
+ Exception.__init__(self)
+ self.reason = reason
+ self.status = status
+ self.body = body
+
+ def __repr__(self):
+ return str(self)
+
+ def __str__(self):
+ message = '%s:' % self.__class__.__name__
+ if self.status:
+ message += ' %s' % self.status
+ message += ' %s' % self.reason
+ if self.body:
+ message += '\n%s' % self.body
+ return message
+
+
+class RetryableServiceException(ServiceException):
+ """Exception class for retryable exceptions."""
+
+
+class ResumableDownloadException(RetryableServiceException):
+ """Exception raised for resumable downloads that can be retried later."""
+
+
+class ResumableUploadException(RetryableServiceException):
+ """Exception raised for resumable uploads that can be retried later."""
+
+
+class ResumableUploadAbortException(ServiceException):
+ """Exception raised for resumable uploads that cannot be retried
later."""
+
+
+class AuthenticationException(ServiceException):
+ """Exception raised for errors during the authentication process."""
+
+
+class PreconditionException(ServiceException):
+ """Exception raised for precondition failures."""
+
+
+class NotFoundException(ServiceException):
+ """Exception raised when a resource is not found (404)."""
+
+
+class NotEmptyException(ServiceException):
+ """Exception raised when trying to delete a bucket is not empty."""
+
+
+class BadRequestException(ServiceException):
+ """Exception raised for malformed requests.
+
+ Where it is possible to detect invalid arguments prior to sending them
+ to the server, an ArgumentException should be raised instead.
+ """
+
+
+class AccessDeniedException(ServiceException):
+ """Exception raised when authenticated user has insufficient access
rights.
+
+ This is raised when the authentication process succeeded but the
+ authenticated user does not have access rights to the requested
resource.
+ """
+
+
=======================================
--- /dev/null
+++ /third_party/gsutil/gslib/cloud_api_delegator.py Mon Nov 3 12:38:28
2014 UTC
@@ -0,0 +1,421 @@
+# -*- coding: utf-8 -*-
+# Copyright 2013 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Gsutil API delegator for interacting with cloud storage providers."""
+
+from __future__ import absolute_import
+
+import boto
+from boto import config
+from gslib.cloud_api import ArgumentException
+from gslib.cloud_api import CloudApi
+from gslib.cs_api_map import ApiMapConstants
+from gslib.cs_api_map import ApiSelector
+
+
+class CloudApiDelegator(CloudApi):
+ """Class that handles delegating requests to gsutil Cloud API
implementations.
+
+ This class is responsible for determining at runtime which gsutil Cloud
API
+ implementation should service the request based on the Cloud storage
provider,
+ command-level API support, and configuration file override.
+
+ During initialization it takes as an argument a gsutil_api_map which maps
+ providers to their default and supported gsutil Cloud API implementations
+ (see comments in cs_api_map for details).
+
+ Instantiation of multiple delegators per-thread is required for
multiprocess
+ and/or multithreaded operations. Calling methods on the same delegator in
+ multiple threads is unsafe.
+ """
+
+ def __init__(self, bucket_storage_uri_class, gsutil_api_map, logger,
+ provider=None, debug=0):
+ """Performs necessary setup for delegating cloud storage requests.
+
+ This function has different arguments than the gsutil Cloud API
__init__
+ function because of the delegation responsibilties of this class.
+
+ Args:
+ bucket_storage_uri_class: boto storage_uri class, used by APIs that
+ provide boto translation or mocking.
+ gsutil_api_map: Map of providers and API selector tuples to api
classes
+ which can be used to communicate with those
providers.
+ logger: logging.logger for outputting log messages.
+ provider: Default provider prefix describing cloud storage provider
to
+ connect to.
+ debug: Debug level for the API implementation (0..3).
+ """
+ super(CloudApiDelegator, self).__init__(bucket_storage_uri_class,
logger,
+ provider=provider, debug=debug)
+ self.api_map = gsutil_api_map
+ self.prefer_api = boto.config.get('GSUtil', 'prefer_api', '').upper()
+ self.loaded_apis = {}
+
+ if not self.api_map[ApiMapConstants.API_MAP]:
+ raise ArgumentException('No apiclass supplied for gsutil Cloud API
map.')
+
+ def _GetApi(self, provider):
+ """Returns a valid CloudApi for use by the caller.
+
+ This function lazy-loads connection and credentials using the API map
+ and credential store provided during class initialization.
+
+ Args:
+ provider: Provider to load API for. If None, class-wide default is
used.
+
+ Raises:
+ ArgumentException if there is no matching API available in the API
map.
+
+ Returns:
+ Valid API instance that can be used to communicate with the Cloud
+ Storage provider.
+ """
+ provider = provider or self.provider
+ if not provider:
+ raise ArgumentException('No provider selected for _GetApi')
+
+ provider = str(provider)
+ if provider not in self.loaded_apis:
+ self.loaded_apis[provider] = {}
+
+ api_selector = self.GetApiSelector(provider)
+ if api_selector not in self.loaded_apis[provider]:
+ # Need to load the API.
+ self._LoadApi(provider, api_selector)
+
+ return self.loaded_apis[provider][api_selector]
+
+ def _LoadApi(self, provider, api_selector):
+ """Loads a CloudApi into the loaded_apis map for this class.
+
+ Args:
+ provider: Provider to load the API for.
+ api_selector: cs_api_map.ApiSelector defining the API type.
+ """
+ if provider not in self.api_map[ApiMapConstants.API_MAP]:
+ raise ArgumentException(
+ 'gsutil Cloud API map contains no entry for provider %s.' %
provider)
+ if api_selector not in self.api_map[ApiMapConstants.API_MAP][provider]:
+ raise ArgumentException(
+ 'gsutil Cloud API map does not support API %s for provider %s.' %
+ (api_selector, provider))
+ self.loaded_apis[provider][api_selector] = (
+ self.api_map[ApiMapConstants.API_MAP][provider][api_selector](
+ self.bucket_storage_uri_class,
+ self.logger,
+ provider=provider,
+ debug=self.debug))
+
+ def GetApiSelector(self, provider=None):
+ """Returns a cs_api_map.ApiSelector based on input and configuration.
+
+ Args:
+ provider: Provider to return the ApiSelector for. If None,
class-wide
+ default is used.
+
+ Returns:
+ cs_api_map.ApiSelector that will be used for calls to the delegator
+ for this provider.
+ """
+ selected_provider = provider or self.provider
+ if not selected_provider:
+ raise ArgumentException('No provider selected for CloudApi')
+
+ if (selected_provider not in self.api_map[ApiMapConstants.DEFAULT_MAP]
or
+ self.api_map[ApiMapConstants.DEFAULT_MAP][selected_provider] not in
+ self.api_map[ApiMapConstants.API_MAP][selected_provider]):
+ raise ArgumentException('No default api available for provider %s' %
+ selected_provider)
+
+ if selected_provider not in self.api_map[ApiMapConstants.SUPPORT_MAP]:
+ raise ArgumentException('No supported apis available for
provider %s' %
+ selected_provider)
+
+ api = self.api_map[ApiMapConstants.DEFAULT_MAP][selected_provider]
+
+ # If we have only HMAC credentials for Google Cloud Storage, we must
use
+ # the XML API as the JSON API does not support HMAC.
+ #
+ # Technically if we have only HMAC credentials, we should still be
able to
+ # access public read resources via the JSON API, but the XML API can do
+ # that just as well. It is better to use it than inspect the
credentials on
+ # every HTTP call.
+ if (provider == 'gs' and
+ not config.has_option('Credentials', 'gs_oauth2_refresh_token') and
+ not (config.has_option('Credentials', 'gs_service_client_id')
+ and config.has_option('Credentials', 'gs_service_key_file'))
and
+ (config.has_option('Credentials', 'gs_access_key_id')
+ and config.has_option('Credentials', 'gs_secret_access_key'))):
+ api = ApiSelector.XML
+ # Try to force the user's preference to a supported API.
+ elif self.prefer_api in (self.api_map[ApiMapConstants.SUPPORT_MAP]
+ [selected_provider]):
+ api = self.prefer_api
+ return api
+
+ # For function docstrings, see CloudApi class.
+ def GetBucket(self, bucket_name, provider=None, fields=None):
+ return self._GetApi(provider).GetBucket(bucket_name, fields=fields)
+
+ def ListBuckets(self, project_id=None, provider=None, fields=None):
+ return self._GetApi(provider).ListBuckets(project_id=project_id,
+ fields=fields)
+
+ def PatchBucket(self, bucket_name, metadata, preconditions=None,
+ provider=None, fields=None):
+ return self._GetApi(provider).PatchBucket(
+ bucket_name, metadata, preconditions=preconditions, fields=fields)
+
+ def CreateBucket(self, bucket_name, project_id=None, metadata=None,
+ provider=None, fields=None):
+ return self._GetApi(provider).CreateBucket(
+ bucket_name, project_id=project_id, metadata=metadata,
fields=fields)
+
+ def DeleteBucket(self, bucket_name, preconditions=None, provider=None):
+ return self._GetApi(provider).DeleteBucket(bucket_name,
+ preconditions=preconditions)
+
+ def ListObjects(self, bucket_name, prefix=None, delimiter=None,
+ all_versions=None, provider=None, fields=None):
+ return self._GetApi(provider).ListObjects(
+ bucket_name, prefix=prefix, delimiter=delimiter,
+ all_versions=all_versions, fields=fields)
+
+ def GetObjectMetadata(self, bucket_name, object_name, generation=None,
+ provider=None, fields=None):
+ return self._GetApi(provider).GetObjectMetadata(
+ bucket_name, object_name, generation=generation, fields=fields)
+
+ def PatchObjectMetadata(self, bucket_name, object_name, metadata,
+ generation=None, preconditions=None,
provider=None,
+ fields=None):
+ return self._GetApi(provider).PatchObjectMetadata(
+ bucket_name, object_name, metadata, generation=generation,
+ preconditions=preconditions, fields=fields)
+
+ def GetObjectMedia(
+ self, bucket_name, object_name, download_stream, provider=None,
+ generation=None, object_size=None,
+ download_strategy=CloudApi.DownloadStrategy.ONE_SHOT,
+ start_byte=0, end_byte=None, progress_callback=None,
+ serialization_data=None, digesters=None):
+ return self._GetApi(provider).GetObjectMedia(
+ bucket_name, object_name, download_stream,
+ download_strategy=download_strategy, start_byte=start_byte,
+ end_byte=end_byte, generation=generation, object_size=object_size,
+ progress_callback=progress_callback,
+ serialization_data=serialization_data, digesters=digesters)
+
+ def UploadObject(self, upload_stream, object_metadata, size=None,
+ canned_acl=None, preconditions=None,
progress_callback=None,
+ provider=None, fields=None):
+ return self._GetApi(provider).UploadObject(
+ upload_stream, object_metadata, size=size, canned_acl=canned_acl,
+ preconditions=preconditions, progress_callback=progress_callback,
+ fields=fields)
+
+ def UploadObjectStreaming(self, upload_stream, object_metadata,
+ canned_acl=None, preconditions=None,
+ progress_callback=None, provider=None,
fields=None):
+ return self._GetApi(provider).UploadObjectStreaming(
+ upload_stream, object_metadata, canned_acl=canned_acl,
+ preconditions=preconditions, progress_callback=progress_callback,
+ fields=fields)
+
+ def UploadObjectResumable(
+ self, upload_stream, object_metadata, canned_acl=None,
preconditions=None,
+ provider=None, fields=None, size=None, serialization_data=None,
+ tracker_callback=None, progress_callback=None):
+ return self._GetApi(provider).UploadObjectResumable(
+ upload_stream, object_metadata, canned_acl=canned_acl,
+ preconditions=preconditions, size=size, fields=fields,
+ serialization_data=serialization_data,
+ tracker_callback=tracker_callback,
progress_callback=progress_callback)
+
+ def CopyObject(self, src_bucket_name, src_obj_name, dst_obj_metadata,
+ src_generation=None, canned_acl=None, preconditions=None,
+ provider=None, fields=None):
+ return self._GetApi(provider).CopyObject(
+ src_bucket_name, src_obj_name, dst_obj_metadata,
+ src_generation=src_generation, canned_acl=canned_acl,
+ preconditions=preconditions, fields=fields)
+
+ def ComposeObject(self, src_objs_metadata, dst_obj_metadata,
+ preconditions=None, provider=None, fields=None):
+ return self._GetApi(provider).ComposeObject(
+ src_objs_metadata, dst_obj_metadata, preconditions=preconditions,
+ fields=fields)
+
+ def DeleteObject(self, bucket_name, object_name, preconditions=None,
+ generation=None, provider=None):
+ return self._GetApi(provider).DeleteObject(
+ bucket_name, object_name, preconditions=preconditions,
+ generation=generation)
+
+ def WatchBucket(self, bucket_name, address, channel_id, token=None,
+ provider=None, fields=None):
+ return self._GetApi(provider).WatchBucket(
+ bucket_name, address, channel_id, token=token, fields=fields)
+
+ def StopChannel(self, channel_id, resource_id, provider=None):
+ return self._GetApi(provider).StopChannel(channel_id, resource_id)
+
+ def XmlPassThroughGetAcl(self, storage_url, def_obj_acl=False,
provider=None):
+ """XML compatibility function for getting ACLs.
+
+ Args:
+ storage_url: StorageUrl object.
+ def_obj_acl: If true, get the default object ACL on a bucket.
+ provider: Cloud storage provider to connect to. If not present,
+ class-wide default is used.
+
+ Raises:
+ ArgumentException for errors during input validation.
+ ServiceException for errors interacting with cloud storage providers.
+
+ Returns:
+ ACL XML for the resource specified by storage_url.
+ """
+ return self._GetApi(provider).XmlPassThroughGetAcl(storage_url,
+
def_obj_acl=def_obj_acl)
+
+ def XmlPassThroughSetAcl(self, acl_text, storage_url, canned=True,
+ def_obj_acl=False, provider=None):
+ """XML compatibility function for setting ACLs.
+
+ Args:
+ acl_text: XML ACL or canned ACL string.
+ storage_url: StorageUrl object.
+ canned: If true, acl_text is treated as a canned ACL string.
+ def_obj_acl: If true, set the default object ACL on a bucket.
+ provider: Cloud storage provider to connect to. If not present,
+ class-wide default is used.
+
+ Raises:
+ ArgumentException for errors during input validation.
+ ServiceException for errors interacting with cloud storage providers.
+
+ Returns:
+ None.
+ """
+ self._GetApi(provider).XmlPassThroughSetAcl(
+ acl_text, storage_url, canned=canned, def_obj_acl=def_obj_acl)
+
+ def XmlPassThroughGetCors(self, storage_url, provider=None):
+ """XML compatibility function for getting CORS configuration on a
bucket.
+
+ Args:
+ storage_url: StorageUrl object.
+ provider: Cloud storage provider to connect to. If not present,
+ class-wide default is used.
+
+ Raises:
+ ArgumentException for errors during input validation.
+ ServiceException for errors interacting with cloud storage providers.
+
+ Returns:
+ CORS configuration XML for the bucket specified by storage_url.
+ """
+ return self._GetApi(provider).XmlPassThroughGetCors(storage_url)
+
+ def XmlPassThroughSetCors(self, cors_text, storage_url, provider=None):
+ """XML compatibility function for setting CORS configuration on a
bucket.
+
+ Args:
+ cors_text: Raw CORS XML string.
+ storage_url: StorageUrl object.
+ provider: Cloud storage provider to connect to. If not present,
+ class-wide default is used.
+
+ Raises:
+ ArgumentException for errors during input validation.
+ ServiceException for errors interacting with cloud storage providers.
+
+ Returns:
+ None.
+ """
+ self._GetApi(provider).XmlPassThroughSetCors(cors_text, storage_url)
+
+ def XmlPassThroughGetLifecycle(self, storage_url, provider=None):
+ """XML compatibility function for getting lifecycle config on a bucket.
+
+ Args:
+ storage_url: StorageUrl object.
+ provider: Cloud storage provider to connect to. If not present,
+ class-wide default is used.
+
+ Raises:
+ ArgumentException for errors during input validation.
+ ServiceException for errors interacting with cloud storage providers.
+
+ Returns:
+ Lifecycle configuration XML for the bucket specified by storage_url.
+ """
+ return self._GetApi(provider).XmlPassThroughGetLifecycle(storage_url)
+
+ def XmlPassThroughSetLifecycle(self, lifecycle_text, storage_url,
+ provider=None):
+ """XML compatibility function for setting CORS configuration on a
bucket.
+
+ Args:
+ lifecycle_text: Raw lifecycle configuration XML string.
+ storage_url: StorageUrl object.
+ provider: Cloud storage provider to connect to. If not present,
+ class-wide default is used.
+
+ Raises:
+ ArgumentException for errors during input validation.
+ ServiceException for errors interacting with cloud storage providers.
+
+ Returns:
+ None.
+ """
+ self._GetApi(provider).XmlPassThroughSetLifecycle(lifecycle_text,
+ storage_url)
+
+ def XmlPassThroughGetLogging(self, storage_url, provider=None):
+ """XML compatibility function for getting logging configuration on a
bucket.
+
+ Args:
+ storage_url: StorageUrl object.
+ provider: Cloud storage provider to connect to. If not present,
+ class-wide default is used.
+
+ Raises:
+ ArgumentException for errors during input validation.
+ ServiceException for errors interacting with cloud storage providers.
+
+ Returns:
+ Logging configuration XML for the bucket specified by storage_url.
+ """
+ return self._GetApi(provider).XmlPassThroughGetLogging(storage_url)
+
+ def XmlPassThroughGetWebsite(self, storage_url, provider=None):
+ """XML compatibility function for getting website configuration on a
bucket.
+
+ Args:
+ storage_url: StorageUrl object.
+ provider: Cloud storage provider to connect to. If not present,
+ class-wide default is used.
+
+ Raises:
+ ArgumentException for errors during input validation.
+ ServiceException for errors interacting with cloud storage providers.
+
+ Returns:
+ Website configuration XML for the bucket specified by storage_url.
+ """
+ return self._GetApi(provider).XmlPassThroughGetWebsite(storage_url)
+
=======================================
--- /dev/null
+++ /third_party/gsutil/gslib/cloud_api_helper.py Mon Nov 3 12:38:28 2014
UTC
@@ -0,0 +1,65 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Helper functions for Cloud API implementations."""
+
+from __future__ import absolute_import
+
+from gslib.cloud_api import ArgumentException
+
+
+def ValidateDstObjectMetadata(dst_obj_metadata):
+ """Ensures dst_obj_metadata supplies the needed fields for copy and
insert.
+
+ Args:
+ dst_obj_metadata: Metadata to validate.
+
+ Raises:
+ ArgumentException if metadata is invalid.
+ """
+ if not dst_obj_metadata:
+ raise ArgumentException(
+ 'No object metadata supplied for destination object.')
+ if not dst_obj_metadata.name:
+ raise ArgumentException(
+ 'Object metadata supplied for destination object had no object
name.')
+ if not dst_obj_metadata.bucket:
+ raise ArgumentException(
+ 'Object metadata supplied for destination object had no bucket
name.')
+
+
+def GetDownloadSerializationDict(src_obj_metadata):
+ """Returns a baseline serialization dict from the source object metadata.
+
+ There are four entries:
+ auto_transfer: JSON-specific field, always False.
+ progress: How much of the download has already been completed. Caller
+ should override this value if the download is being resumed.
+ total_size: Total object size.
+ url: Implementation-specific field used for saving a metadata get call.
+ For JSON, this the download URL of the object.
+ For XML, this is a pickled boto key.
+
+ Args:
+ src_obj_metadata: Object to be downloaded.
+
+ Returns:
+ Serialization dict for use with Cloud API GetObjectMedia.
+ """
+ return {
+ 'auto_transfer': 'False',
+ 'progress': 0,
+ 'total_size': src_obj_metadata.size,
+ 'url': src_obj_metadata.mediaLink
+ }
=======================================
--- /dev/null
+++ /third_party/gsutil/gslib/commands/hash.py Mon Nov 3 12:38:28 2014 UTC
@@ -0,0 +1,175 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Implementation of hash command for calculating hashes of local files."""
+
+from hashlib import md5
+import os
+
+import crcmod
+
+from gslib.command import Command
+from gslib.cs_api_map import ApiSelector
+from gslib.exception import CommandException
+from gslib.hashing_helper import Base64EncodeHash
+from gslib.hashing_helper import CalculateHashesFromContents
+from gslib.hashing_helper import SLOW_CRCMOD_WARNING
+from gslib.progress_callback import ConstructAnnounceText
+from gslib.progress_callback import FileProgressCallbackHandler
+from gslib.progress_callback import ProgressCallbackWithBackoff
+from gslib.storage_url import StorageUrlFromString
+from gslib.util import NO_MAX
+from gslib.util import UsingCrcmodExtension
+
+_DETAILED_HELP_TEXT = ("""
+SYNOPSIS
+ gsutil [-c] [-h] [-m] hash filename...
+
+DESCRIPTION
+ The hash command calculates hashes on a local file that can be used to
compare
+ with gsutil ls -L output. If a specific hash option is not provided, this
+ command calculates all gsutil-supported hashes for the file.
+
+ Note that gsutil automatically performs hash validation when uploading or
+ downloading files, so this command is only needed if you want to write a
+ script that separately checks the hash for some reason.
+
+ If you calculate a CRC32c hash for the file without a precompiled crcmod
+ installation, hashing will be very slow. See "gsutil help crcmod" for
details.
+
+OPTIONS
+ -c Calculate a CRC32c hash for the file.
+
+ -h Output hashes in hex format. By default, gsutil uses base64.
+
+ -m Calculate a MD5 hash for the file.
+""")
+
+
+class HashCommand(Command):
+ """Implementation of gsutil hash command."""
+
+ # Command specification. See base class for documentation.
+ command_spec = Command.CreateCommandSpec(
+ 'hash',
+ command_name_aliases=[],
+ min_args=1,
+ max_args=NO_MAX,
+ supported_sub_args='chm',
+ file_url_ok=True,
+ provider_url_ok=False,
+ urls_start_arg=0,
+ gs_api_support=[ApiSelector.JSON],
+ gs_default_api=ApiSelector.JSON,
+ )
+ # Help specification. See help_provider.py for documentation.
+ help_spec = Command.HelpSpec(
+ help_name='hash',
+ help_name_aliases=['checksum'],
+ help_type='command_help',
+ help_one_line_summary='Calculate file hashes',
+ help_text=_DETAILED_HELP_TEXT,
+ subcommand_help_text={},
+ )
+
+ @classmethod
+ def _ParseOpts(cls, sub_opts, logger):
+ """Returns behavior variables based on input options.
+
+ Args:
+ sub_opts: getopt sub-arguments for the command.
+ logger: logging.Logger for the command.
+
+ Returns:
+ Tuple of
+ calc_crc32c: Boolean, if True, command should calculate a CRC32c
checksum.
+ calc_md5: Boolean, if True, command should calculate an MD5 hash.
+ format_func: Function used for formatting the hash in the desired
format.
+ output_format: String describing the hash output format.
+ """
+ calc_crc32c = False
+ calc_md5 = False
+ format_func = lambda digest: Base64EncodeHash(digest.hexdigest())
+ found_hash_option = False
+ output_format = 'base64'
+
+ if sub_opts:
+ for o, unused_a in sub_opts:
+ if o == '-c':
+ calc_crc32c = True
+ found_hash_option = True
+ elif o == '-h':
+ output_format = 'hex'
+ format_func = lambda digest: digest.hexdigest()
+ elif o == '-m':
+ calc_md5 = True
+ found_hash_option = True
+
+ if not found_hash_option:
+ calc_crc32c = True
+ calc_md5 = True
+
+ if calc_crc32c and not UsingCrcmodExtension(crcmod):
+ logger.warn(SLOW_CRCMOD_WARNING)
+
+ return calc_crc32c, calc_md5, format_func, output_format
+
+ def _GetHashClassesFromArgs(self, calc_crc32c, calc_md5):
+ """Constructs the dictionary of hashes to compute based on the
arguments.
+
+ Args:
+ calc_crc32c: If True, CRC32c should be included.
+ calc_md5: If True, MD5 should be included.
+
+ Returns:
+ Dictionary of {string: hash digester}, where string the name of the
+ digester algorithm.
+ """
+ hash_dict = {}
+ if calc_crc32c:
+ hash_dict['crc32c'] = crcmod.predefined.Crc('crc-32c')
+ if calc_md5:
+ hash_dict['md5'] = md5()
+ return hash_dict
+
+ def RunCommand(self):
+ """Command entry point for the hash command."""
+ (calc_crc32c, calc_md5, format_func, output_format) = (
+ self._ParseOpts(self.sub_opts, self.logger))
+
+ matched_one = False
+ for url_str in self.args:
+ if not StorageUrlFromString(url_str).IsFileUrl():
+ raise CommandException('"hash" command requires a file URL')
+
+ for file_ref in self.WildcardIterator(url_str).IterObjects():
+ matched_one = True
+ file_name = file_ref.storage_url.object_name
+ file_size = os.path.getsize(file_name)
+ callback_processor = ProgressCallbackWithBackoff(
+ file_size, FileProgressCallbackHandler(
+ ConstructAnnounceText('Hashing', file_name),
self.logger).call)
+ hash_dict = self._GetHashClassesFromArgs(calc_crc32c, calc_md5)
+ with open(file_name, 'rb') as fp:
+ CalculateHashesFromContents(fp, hash_dict,
+
callback_processor=callback_processor)
+ print 'Hashes [%s] for %s:' % (output_format, file_name)
+ for name, digest in hash_dict.iteritems():
+ print '\tHash (%s):\t\t%s' % (name, format_func(digest))
+
+ if not matched_one:
+ raise CommandException('No files matched')
+
+ return 0
+
=======================================
--- /dev/null
+++ /third_party/gsutil/gslib/commands/rsync.py Mon Nov 3 12:38:28 2014 UTC
@@ -0,0 +1,851 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Implementation of Unix-like rsync command."""
+
+from __future__ import absolute_import
+
+import errno
+import heapq
+import io
+from itertools import islice
+import os
+import tempfile
+import textwrap
+import traceback
+import urllib
+
+from boto import config
+import crcmod
+
+from gslib import copy_helper
+from gslib.cloud_api import NotFoundException
+from gslib.command import Command
+from gslib.command import DummyArgChecker
+from gslib.copy_helper import CreateCopyHelperOpts
+from gslib.cs_api_map import ApiSelector
+from gslib.exception import CommandException
+from gslib.hashing_helper import CalculateB64EncodedCrc32cFromContents
+from gslib.hashing_helper import CalculateB64EncodedMd5FromContents
+from gslib.hashing_helper import SLOW_CRCMOD_WARNING
+from gslib.plurality_checkable_iterator import PluralityCheckableIterator
+from gslib.storage_url import StorageUrlFromString
+from gslib.util import GetCloudApiInstance
+from gslib.util import IsCloudSubdirPlaceholder
+from gslib.util import TEN_MB
+from gslib.util import UsingCrcmodExtension
+from gslib.util import UTF8
+from gslib.wildcard_iterator import CreateWildcardIterator
+
+
+_DETAILED_HELP_TEXT = ("""
+SYNOPSIS
+ gsutil rsync [-c] [-C] [-d] [-e] [-n] [-p] [-R] src_url dst_url
+
+
+DESCRIPTION
+ The gsutil rsync command makes the contents under dst_url the same as the
+ contents under src_url, by copying any missing files/objects, and (if the
+ -d option is specified) deleting any extra files/objects. For example, to
+ make gs://mybucket/data match the contents of the local directory "data"
+ you could do:
+
+ gsutil rsync -d data gs://mybucket/data
+
+ To recurse into directories use the -r option:
+
+ gsutil rsync -d -r data gs://mybucket/data
+
+ To copy only new/changed files without deleting extra files from
+ gs://mybucket/data leave off the -d option:
+
+ gsutil rsync -r data gs://mybucket/data
+
+ If you have a large number of objects to synchronize you might want to
use the
+ gsutil -m option, to perform parallel (multi-threaded/multi-processing)
+ synchronization:
+
+ gsutil -m rsync -d -r data gs://mybucket/data
+
+ The -m option typically will provide a large performance boost if either
the
+ source or destination (or both) is a cloud URL. If both source and
+ destination are file URLs the -m option will typically thrash the disk
and
+ slow synchronization down.
+
+ To make the local directory "data" the same as the contents of
+ gs://mybucket/data:
+
+ gsutil rsync -d -r gs://mybucket/data data
+
+ To make the contents of gs://mybucket2 the same as gs://mybucket1:
+
+ gsutil rsync -d -r gs://mybucket1 gs://mybucket2
+
+ You can also mirror data across local directories:
+
+ gsutil rsync -d -r dir1 dir2
+
+ To mirror your content across clouds:
+
+ gsutil rsync -d -r gs://my-gs-bucket s3://my-s3-bucket
+
+ Note: If you are synchronizing a large amount of data between clouds you
might
+ consider setting up a
+ `Google Compute Engine
<https://cloud.google.com/products/compute-engine>`_
+ account and running gsutil there. Since cross-provider gsutil data
transfers
+ flow through the machine where gsutil is running, doing this can make
your
+ transfer run singificantly faster than running gsutil on your local
+ workstation.
+
+
+CHECKSUM VALIDATION AND FAILURE HANDLING
+ At the end of every upload or download, the gsutil rsync command
validates
+ that the checksum of the source file/object matches the checksum of the
+ destination file/object. If the checksums do not match, gsutil will
delete
+ the invalid copy and print a warning message. This very rarely happens,
but
+ if it does, please contact gs-...@google.com.
+
+ The rsync command will retry when failures occur, but if enough failures
+ happen during a particular copy or delete operation the command will
skip that
+ object and move on. At the end of the synchronization run if any
failures were
+ not successfully retried, the rsync command will report the count of
failures,
+ and exit with non-zero status. At this point you can run the rsync
command
+ again, and it will attempt any remaining needed copy and/or delete
operations.
+
+ Note that there are cases where retrying will never succeed, such as if
you
+ don't have write permission to the destination bucket or if the
destination
+ path for some objects is longer than the maximum allowed length.
+
+ For more details about gsutil's retry handling, please see
+ "gsutil help retries".
+
+
+CHANGE DETECTION ALGORITHM
+ To determine if a file or object has changed gsutil rsync first checks
whether
+ the source and destination sizes match. If they match, it next checks if
their
+ checksums match, using whatever checksums are available (see below).
Unlike
+ the Unix rsync command, gsutil rsync does not use timestamps to
determine if
+ the file/object changed, because the GCS API does not permit the caller
to set
+ an object's timestamp (hence, timestamps of identical files/objects
cannot be
+ made to match).
+
+ Checksums will not be available in two cases:
+
+ 1. When synchronizing to or from a file system. By default, gsutil does
not
+ checksum files, because of the slowdown caused when working with large
+ files. You can cause gsutil to checksum files by using the
+ gsutil rsync -c option, at the cost of increased local disk I/O and
run
+ time when working with large files.
+
+ 2. When comparing composite GCS objects with objects at a cloud provider
that
+ does not support CRC32C (which is the only checksum available for
composite
+ objects). See 'gsutil help compose' for details about composite
objects.
+
+
+COPYING IN THE CLOUD AND METADATA PRESERVATION
+ If both the source and destination URL are cloud URLs from the same
provider,
+ gsutil copies data "in the cloud" (i.e., without downloading to and
uploading
+ from the machine where you run gsutil). In addition to the performance
and
+ cost advantages of doing this, copying in the cloud preserves metadata
(like
+ Content-Type and Cache-Control). In contrast, when you download data
from the
+ cloud it ends up in a file, which has no associated metadata. Thus,
unless you
+ have some way to hold on to or re-create that metadata, synchronizing a
bucket
+ to a directory in the local file system will not retain the metadata.
+
+ Note that by default, the gsutil rsync command does not copy the ACLs of
+ objects being synchronized and instead will use the default bucket ACL
(see
+ "gsutil help defacl"). You can override this behavior with the -p option
(see
+ OPTIONS below).
+
+
+SLOW CHECKSUMS
+ If you find that CRC32C checksum computation runs slowly, this is likely
+ because you don't have a compiled CRC32c on your system. Try running:
+
+ gsutil ver -l
+
+ If the output contains:
+
+ compiled crcmod: False
+
+ you are running a Python library for computing CRC32C, which is much
slower
+ than using the compiled code. For information on getting a compiled
CRC32C
+ implementation, see 'gsutil help crc32c'.
+
+
+LIMITATIONS
+ 1. The gsutil rsync command doesn't make the destination object's
timestamps
+ match those of the source object (it can't; timestamp setting is not
+ allowed by the GCS API).
+
+ 2. The gsutil rsync command ignores versioning, synchronizing only the
live
+ object versions in versioned buckets.
+
+
+OPTIONS
+ -c Causes the rsync command to compute checksums for files if
the
+ size of source and destination match, and then compare
+ checksums. This option increases local disk I/O and run
time
+ if either src_url or dst_url are on the local file system.
+
+ -C If an error occurs, continue to attempt to copy the
remaining
+ files. If errors occurred, gsutil's exit status will be
non-zero
+ even if this flag is set. This option is implicitly set
when
+ running "gsutil -m rsync...". Note: -C only applies to the
+ actual copying operation. If an error occurs while
iterating
+ over the files in the local directory (e.g., invalid
Unicode
+ file name) gsutil will print an error message and abort.
+
+ -d Delete extra files under dst_url not found under src_url.
By
+ default extra files are not deleted.
+
+ -e Exclude symlinks. When specified, symbolic links will be
+ ignored.
+
+ -n Causes rsync to run in "dry run" mode, i.e., just
outputting
+ what would be copied or deleted without actually doing any
+ copying/deleting.
+
+ -p Causes ACLs to be preserved when synchronizing in the
cloud.
+ Note that this option has performance and cost
implications when
+ using the XML API, as it requires separate HTTP calls for
+ interacting with ACLs. The performance issue can be
mitigated to
+ some degree by using gsutil -m rsync to cause parallel
+ synchronization. Also, this option only works if you have
OWNER
+ access to all of the objects that are copied.
+
+ You can avoid the additional performance and cost of using
+ rsync -p if you want all objects in the destination bucket
to
+ end up with the same ACL by setting a default object ACL
on that
+ bucket instead of using rsync -p. See 'help gsutil defacl'.
+
+ -R, -r Causes directories, buckets, and bucket subdirectories to
be
+ synchronized recursively. If you neglect to use this option
+ gsutil will make only the top-level directory in the source
+ and destination URLs match, skipping any sub-directories.
+""")
+
+
+class _DiffAction(object):
+ COPY = 'copy'
+ REMOVE = 'remove'
+
+
+_NA = '-'
+_OUTPUT_BUFFER_SIZE = 64 * 1024
+_PROGRESS_REPORT_LISTING_COUNT = 10000
+
+
+class _DiffToApply(object):
+ """Class that encapsulates info needed to apply diff for one object."""
+
+ def __init__(self, src_url_str, dst_url_str, diff_action):
+ """Constructor.
+
+ Args:
+ src_url_str: The source URL string, or None if diff_action is REMOVE.
+ dst_url_str: The destination URL string.
+ diff_action: _DiffAction to be applied.
+ """
+ self.src_url_str = src_url_str
+ self.dst_url_str = dst_url_str
+ self.diff_action = diff_action
+
+
+def _DiffToApplyArgChecker(command_instance, diff_to_apply):
+ """Arg checker that skips symlinks if -e flag specified."""
+ if (diff_to_apply.diff_action == _DiffAction.REMOVE
+ or not command_instance.exclude_symlinks):
+ # No src URL is populated for REMOVE actions.
+ return True
+ exp_src_url = StorageUrlFromString(diff_to_apply.src_url_str)
+ if exp_src_url.IsFileUrl() and os.path.islink(exp_src_url.object_name):
+ command_instance.logger.info('Skipping symbolic link %s...',
exp_src_url)
+ return False
+ return True
+
+
+def _ComputeNeededFileChecksums(logger, src_url_str, src_size, src_crc32c,
+ src_md5, dst_url_str, dst_size, dst_crc32c,
+ dst_md5):
+ """Computes any file checksums needed by _ObjectsMatch.
+
+ Args:
+ logger: logging.logger for outputting log messages.
+ src_url_str: Source URL string.
+ src_size: Source size
+ src_crc32c: Source CRC32c.
+ src_md5: Source MD5.
+ dst_url_str: Destination URL string.
+ dst_size: Destination size
+ dst_crc32c: Destination CRC32c.
+ dst_md5: Destination MD5.
+
+ Returns:
+ (src_crc32c, src_md5, dst_crc32c, dst_md5)
+ """
+ src_url = StorageUrlFromString(src_url_str)
+ dst_url = StorageUrlFromString(dst_url_str)
+ if src_url.IsFileUrl():
+ if dst_crc32c != _NA or dst_url.IsFileUrl():
+ if src_size > TEN_MB:
+ logger.info('Computing MD5 for %s...', src_url_str)
+ with open(src_url.object_name, 'rb') as fp:
+ src_crc32c = CalculateB64EncodedCrc32cFromContents(fp)
+ elif dst_md5 != _NA or dst_url.IsFileUrl():
+ if dst_size > TEN_MB:
+ logger.info('Computing MD5 for %s...', dst_url_str)
+ with open(src_url.object_name, 'rb') as fp:
+ src_md5 = CalculateB64EncodedMd5FromContents(fp)
+ if dst_url.IsFileUrl():
+ if src_crc32c != _NA:
+ if src_size > TEN_MB:
+ logger.info('Computing CRC32C for %s...', src_url_str)
+ with open(dst_url.object_name, 'rb') as fp:
+ dst_crc32c = CalculateB64EncodedCrc32cFromContents(fp)
+ elif src_md5 != _NA:
+ if dst_size > TEN_MB:
+ logger.info('Computing CRC32C for %s...', dst_url_str)
+ with open(dst_url.object_name, 'rb') as fp:
+ dst_md5 = CalculateB64EncodedMd5FromContents(fp)
+ return (src_crc32c, src_md5, dst_crc32c, dst_md5)
+
+
+def _ListUrlRootFunc(cls, args_tuple, thread_state=None):
+ """Worker function for listing files/objects under to be sync'd.
+
+ Outputs sorted list to out_file_name, formatted per _BuildTmpOutputLine.
We
+ sort the listed URLs because we don't want to depend on consistent sort
+ order across file systems and cloud providers.
+
+ Args:
+ cls: Command instance.
+ args_tuple: (url_str, out_file_name, desc), where url_str is URL
string to
+ list; out_file_name is name of file to which sorted output
+ should be written; desc is 'source' or 'destination'.
+ thread_state: gsutil Cloud API instance to use.
+ """
+ gsutil_api = GetCloudApiInstance(cls, thread_state=thread_state)
+ (url_str, out_file_name, desc) = args_tuple
+ # We sort while iterating over url_str, allowing parallelism of batched
+ # sorting with collecting the listing.
+ out_file = io.open(out_file_name, mode='w', encoding=UTF8)
+ _BatchSort(_FieldedListingIterator(cls, gsutil_api, url_str, desc),
out_file)
+ out_file.close()
+
+
+def _FieldedListingIterator(cls, gsutil_api, url_str, desc):
+ """Iterator over url_str outputting lines formatted per
_BuildTmpOutputLine.
+
+ Args:
+ cls: Command instance.
+ gsutil_api: gsutil Cloud API instance to use for bucket listing.
+ url_str: The URL string over which to iterate.
+ desc: 'source' or 'destination'.
+
+ Yields:
+ Output line formatted per _BuildTmpOutputLine.
+ """
+ if cls.recursion_requested:
+ wildcard = '%s/**' % url_str.rstrip('/\\')
+ else:
+ wildcard = '%s/*' % url_str.rstrip('/\\')
+ i = 0
+ for blr in CreateWildcardIterator(
+ wildcard, gsutil_api, debug=cls.debug,
+ project_id=cls.project_id).IterObjects(
+ # Request just the needed fields, to reduce bandwidth usage.
+ bucket_listing_fields=['crc32c', 'md5Hash', 'name', 'size']):
+ # Various GUI tools (like the GCS web console) create placeholder
objects
+ # ending with '/' when the user creates an empty directory. Normally
these
+ # tools should delete those placeholders once objects have been written
+ # "under" the directory, but sometimes the placeholders are left
around.
+ # We need to filter them out here, otherwise if the user tries to rsync
+ # from GCS to a local directory it will result in a directory/file
+ # conflict (e.g., trying to download an object called "mydata/" where
the
+ # local directory "mydata" exists).
+ url = blr.storage_url
+ if IsCloudSubdirPlaceholder(url, blr=blr):
+ cls.logger.info('Skipping cloud sub-directory placeholder
object %s', url)
+ continue
+ if (cls.exclude_symlinks and url.IsFileUrl()
+ and os.path.islink(url.object_name)):
+ continue
+ i += 1
+ if i % _PROGRESS_REPORT_LISTING_COUNT == 0:
+ cls.logger.info('At %s listing %d...', desc, i)
+ yield _BuildTmpOutputLine(blr)
+
+
+def _BuildTmpOutputLine(blr):
+ """Builds line to output to temp file for given BucketListingRef.
+
+ Args:
+ blr: The BucketListingRef.
+
+ Returns:
+ The output line, formatted as quote_plus(URL)<sp>size<sp>crc32c<sp>md5
+ where crc32c will only be present for GCS URLs, and md5 will only be
+ present for cloud URLs that aren't composite objects. A missing field
is
+ populated with '-'.
+ """
+ crc32c = _NA
+ md5 = _NA
+ url = blr.storage_url
+ if url.IsFileUrl():
+ size = os.path.getsize(url.object_name)
+ elif url.IsCloudUrl():
+ size = blr.root_object.size
+ crc32c = blr.root_object.crc32c or _NA
+ md5 = blr.root_object.md5Hash or _NA
+ else:
+ raise CommandException('Got unexpected URL type (%s)' % url.scheme)
+ return '%s %d %s %s\n' % (
+ urllib.quote_plus(url.url_string.encode(UTF8)), size, crc32c, md5)
+
+
+# pylint: disable=bare-except
+def _BatchSort(in_iter, out_file):
+ """Sorts input lines from in_iter and outputs to out_file.
+
+ Sorts in batches as input arrives, so input file does not need to be
loaded
+ into memory all at once. Derived from Python Recipe 466302: Sorting big
+ files the Python 2.4 way by Nicolas Lehuen.
+
+ Sorted format is per _BuildTmpOutputLine. We're sorting on the entire
line
+ when we could just sort on the first record (URL); but the sort order is
+ identical either way.
+
+ Args:
+ in_iter: Input iterator.
+ out_file: Output file.
+ """
+ # Note: If chunk_files gets very large we can run out of open FDs.
See .boto
+ # file comments about rsync_buffer_lines. If increasing
rsync_buffer_lines
+ # doesn't suffice (e.g., for someone synchronizing with a really large
+ # bucket), an option would be to make gsutil merge in passes, never
+ # opening all chunk files simultaneously.
+ buffer_size = config.getint('GSUtil', 'rsync_buffer_lines', 32000)
+ chunk_files = []
+ try:
+ while True:
+ current_chunk = sorted(islice(in_iter, buffer_size))
+ if not current_chunk:
+ break
+ output_chunk = io.open('%s-%06i' % (out_file.name, len(chunk_files)),
+ mode='w+', encoding=UTF8)
+ chunk_files.append(output_chunk)
+ output_chunk.writelines(unicode(''.join(current_chunk)))
+ output_chunk.flush()
+ output_chunk.seek(0)
+ out_file.writelines(heapq.merge(*chunk_files))
+ except IOError as e:
+ if e.errno == errno.EMFILE:
+ raise CommandException('\n'.join(textwrap.wrap(
+ 'Synchronization failed because too many open file handles were '
+ 'needed while building synchronization state. Please see the '
+ 'comments about rsync_buffer_lines in your .boto config file for
a '
+ 'possible way to address this problem.')))
+ raise
+ finally:
+ for chunk_file in chunk_files:
+ try:
+ chunk_file.close()
+ os.remove(chunk_file.name)
+ except:
+ pass
+
+
+class _DiffIterator(object):
+ """Iterator yielding sequence of _DiffToApply objects."""
+
+ def __init__(self, command_obj, base_src_url, base_dst_url):
+ self.command_obj = command_obj
+ self.compute_checksums = command_obj.compute_checksums
+ self.delete_extras = command_obj.delete_extras
+ self.recursion_requested = command_obj.recursion_requested
+ self.logger = self.command_obj.logger
+ self.base_src_url = base_src_url
+ self.base_dst_url = base_dst_url
+ self.logger.info('Building synchronization state...')
+
+ (src_fh, self.sorted_list_src_file_name) = tempfile.mkstemp(
+ prefix='gsutil-rsync-src-')
+ (dst_fh, self.sorted_list_dst_file_name) = tempfile.mkstemp(
+ prefix='gsutil-rsync-dst-')
+ # Close the file handles; the file will be opened in write mode by
+ # _ListUrlRootFunc.
+ os.close(src_fh)
+ os.close(dst_fh)
+
+ # Build sorted lists of src and dst URLs in parallel. To do this, pass
args
+ # to _ListUrlRootFunc as tuple (url_str, out_file_name, desc).
+ args_iter = iter([
+ (self.base_src_url.url_string, self.sorted_list_src_file_name,
+ 'source'),
+ (self.base_dst_url.url_string, self.sorted_list_dst_file_name,
+ 'destination')
+ ])
+ command_obj.Apply(_ListUrlRootFunc, args_iter,
_RootListingExceptionHandler,
+ arg_checker=DummyArgChecker,
+ parallel_operations_override=True,
+ fail_on_error=True)
+
+ self.sorted_list_src_file = open(self.sorted_list_src_file_name, 'r')
+ self.sorted_list_dst_file = open(self.sorted_list_dst_file_name, 'r')
+
+ # Wrap iterators in PluralityCheckableIterator so we can check
emptiness.
+ self.sorted_src_urls_it = PluralityCheckableIterator(
+ iter(self.sorted_list_src_file))
+ self.sorted_dst_urls_it = PluralityCheckableIterator(
+ iter(self.sorted_list_dst_file))
+
+ # pylint: disable=bare-except
+ def CleanUpTempFiles(self):
+ """Cleans up temp files.
+
+ This function allows the main (RunCommand) function to clean up at end
of
+ operation. This is necessary because tempfile.NamedTemporaryFile
doesn't
+ allow the created file to be re-opened in read mode on Windows, so we
have
+ to use tempfile.mkstemp, which doesn't automatically delete temp files
(see
+
https://mail.python.org/pipermail/python-list/2005-December/336958.html).
+ """
+ try:
+ self.sorted_list_src_file.close()
+ self.sorted_list_dst_file.close()
+ for fname in (self.sorted_list_src_file_name,
+ self.sorted_list_dst_file_name):
+ os.unlink(fname)
+ except:
+ pass
+
+ def _ParseTmpFileLine(self, line):
+ """Parses output from _BuildTmpOutputLine.
+
+ Parses into tuple:
+ (URL, size, crc32c, md5)
+ where crc32c and/or md5 can be _NA.
+
+ Args:
+ line: The line to parse.
+
+ Returns:
+ Parsed tuple: (url, size, crc32c, md5)
+ """
+ (encoded_url, size, crc32c, md5) = line.split()
+ return (urllib.unquote_plus(encoded_url).decode(UTF8),
+ int(size), crc32c, md5.strip())
+
+ def _WarnIfMissingCloudHash(self, url_str, crc32c, md5):
+ """Warns if given url_str is a cloud URL and is missing both crc32c
and md5.
+
+ Args:
+ url_str: Destination URL string.
+ crc32c: Destination CRC32c.
+ md5: Destination MD5.
+
+ Returns:
+ True if issued warning.
+ """
+ # One known way this can currently happen is when rsync'ing objects
larger
+ # than 5GB from S3 (for which the etag is not an MD5).
+ if (StorageUrlFromString(url_str).IsCloudUrl()
+ and crc32c == _NA and md5 == _NA):
+ self.logger.warn(
+ 'Found no hashes to validate %s. Integrity cannot be assured
without '
+ 'hashes.', url_str)
+ return True
+ return False
+
+ def _ObjectsMatch(self, src_url_str, src_size, src_crc32c, src_md5,
+ dst_url_str, dst_size, dst_crc32c, dst_md5):
+ """Returns True if src and dst objects are the same.
+
+ Uses size plus whatever checksums are available.
+
+ Args:
+ src_url_str: Source URL string.
+ src_size: Source size
+ src_crc32c: Source CRC32c.
+ src_md5: Source MD5.
+ dst_url_str: Destination URL string.
+ dst_size: Destination size
+ dst_crc32c: Destination CRC32c.
+ dst_md5: Destination MD5.
+
+ Returns:
+ True/False.
+ """
+ # Note: This function is called from __iter__, which is called from the
+ # Command.Apply driver. Thus, all checksum computation will be run in a
+ # single thread, which is good (having multiple threads concurrently
+ # computing checksums would thrash the disk).
+ if src_size != dst_size:
+ return False
+ if self.compute_checksums:
+ (src_crc32c, src_md5, dst_crc32c, dst_md5) =
_ComputeNeededFileChecksums(
+ self.logger, src_url_str, src_size, src_crc32c, src_md5,
dst_url_str,
+ dst_size, dst_crc32c, dst_md5)
+ if src_md5 != _NA and dst_md5 != _NA:
+ self.logger.debug('Comparing md5 for %s and %s', src_url_str,
dst_url_str)
+ return src_md5 == dst_md5
+ if src_crc32c != _NA and dst_crc32c != _NA:
+ self.logger.debug(
+ 'Comparing crc32c for %s and %s', src_url_str, dst_url_str)
+ return src_crc32c == dst_crc32c
+ if not self._WarnIfMissingCloudHash(src_url_str, src_crc32c, src_md5):
+ self._WarnIfMissingCloudHash(dst_url_str, dst_crc32c, dst_md5)
+ # Without checksums to compare we depend only on basic size comparison.
+ return True
+
+ def __iter__(self):
+ """Iterates over src/dst URLs and produces a _DiffToApply sequence.
+
+ Yields:
+ The _DiffToApply.
+ """
+ # Strip trailing slashes, if any, so we compute tail length against
+ # consistent position regardless of whether trailing slashes were
included
+ # or not in URL.
+ base_src_url_len = len(self.base_src_url.url_string.rstrip('/\\'))
+ base_dst_url_len = len(self.base_dst_url.url_string.rstrip('/\\'))
+ src_url_str = dst_url_str = None
+ # Invariant: After each yield, the URLs in src_url_str, dst_url_str,
+ # self.sorted_src_urls_it, and self.sorted_dst_urls_it are not yet
+ # processed. Each time we encounter None in src_url_str or dst_url_str
we
+ # populate from the respective iterator, and we reset one or the other
value
+ # to None after yielding an action that disposes of that URL.
+ while not self.sorted_src_urls_it.IsEmpty() or src_url_str is not None:
+ if src_url_str is None:
+ (src_url_str, src_size, src_crc32c, src_md5) =
self._ParseTmpFileLine(
+ self.sorted_src_urls_it.next())
+ # Skip past base URL and normalize slashes so we can compare across
+ # clouds/file systems (including Windows).
+ src_url_str_to_check =
src_url_str[base_src_url_len:].replace('\\', '/')
+ dst_url_str_would_copy_to = copy_helper.ConstructDstUrl(
+ self.base_src_url, StorageUrlFromString(src_url_str), True,
True,
+ self.base_dst_url, False, self.recursion_requested).url_string
+ if self.sorted_dst_urls_it.IsEmpty():
+ # We've reached end of dst URLs, so copy src to dst.
+ yield _DiffToApply(
+ src_url_str, dst_url_str_would_copy_to, _DiffAction.COPY)
+ src_url_str = None
+ continue
+ if not dst_url_str:
+ (dst_url_str, dst_size, dst_crc32c, dst_md5) = (
+ self._ParseTmpFileLine(self.sorted_dst_urls_it.next()))
+ # Skip past base URL and normalize slashes so we can compare acros
+ # clouds/file systems (including Windows).
+ dst_url_str_to_check =
dst_url_str[base_dst_url_len:].replace('\\', '/')
+
+ if src_url_str_to_check < dst_url_str_to_check:
+ # There's no dst object corresponding to src object, so copy src
to dst.
+ yield _DiffToApply(
+ src_url_str, dst_url_str_would_copy_to, _DiffAction.COPY)
+ src_url_str = None
+ elif src_url_str_to_check > dst_url_str_to_check:
+ # dst object without a corresponding src object, so remove dst if
-d
+ # option was specified.
+ if self.delete_extras:
+ yield _DiffToApply(None, dst_url_str, _DiffAction.REMOVE)
+ dst_url_str = None
+ else:
+ # There is a dst object corresponding to src object, so check if
objects
+ # match.
+ if self._ObjectsMatch(
+ src_url_str, src_size, src_crc32c, src_md5,
+ dst_url_str, dst_size, dst_crc32c, dst_md5):
+ # Continue iterating without yielding a _DiffToApply.
+ src_url_str = None
+ dst_url_str = None
+ else:
+ yield _DiffToApply(src_url_str, dst_url_str, _DiffAction.COPY)
+ dst_url_str = None
+
+ # If -d option specified any files/objects left in dst iteration
should be
+ # removed.
+ if not self.delete_extras:
+ return
+ if dst_url_str:
+ yield _DiffToApply(None, dst_url_str, _DiffAction.REMOVE)
+ dst_url_str = None
+ for line in self.sorted_dst_urls_it:
+ (dst_url_str, _, _, _) = self._ParseTmpFileLine(line)
+ yield _DiffToApply(None, dst_url_str, _DiffAction.REMOVE)
+
+
+def _RsyncFunc(cls, diff_to_apply, thread_state=None):
+ """Worker function for performing the actual copy and remove
operations."""
+ gsutil_api = GetCloudApiInstance(cls, thread_state=thread_state)
+ dst_url_str = diff_to_apply.dst_url_str
+ dst_url = StorageUrlFromString(dst_url_str)
+ if diff_to_apply.diff_action == _DiffAction.REMOVE:
+ if cls.dryrun:
+ cls.logger.info('Would remove %s', dst_url)
+ else:
+ cls.logger.info('Removing %s', dst_url)
+ if dst_url.IsFileUrl():
+ os.unlink(dst_url.object_name)
+ else:
+ try:
+ gsutil_api.DeleteObject(
+ dst_url.bucket_name, dst_url.object_name,
+ generation=dst_url.generation, provider=dst_url.scheme)
+ except NotFoundException:
+ # If the object happened to be deleted by an external process,
this
+ # is fine because it moves us closer to the desired state.
+ pass
+ elif diff_to_apply.diff_action == _DiffAction.COPY:
+ src_url_str = diff_to_apply.src_url_str
+ src_url = StorageUrlFromString(src_url_str)
+ if cls.dryrun:
+ cls.logger.info('Would copy %s to %s', src_url, dst_url)
+ else:
+ copy_helper.PerformCopy(cls.logger, src_url, dst_url, gsutil_api,
cls,
+ _RsyncExceptionHandler,
+ headers=cls.headers)
+ else:
+ raise CommandException('Got unexpected DiffAction (%d)'
+ % diff_to_apply.diff_action)
+
+
+def _RootListingExceptionHandler(cls, e):
+ """Simple exception handler for exceptions during listing URLs to
sync."""
+ cls.logger.error(str(e))
+
+
+def _RsyncExceptionHandler(cls, e):
+ """Simple exception handler to allow post-completion status."""
+ cls.logger.error(str(e))
+ cls.op_failure_count += 1
+ cls.logger.debug('\n\nEncountered exception while syncing:\n%s\n',
+ traceback.format_exc())
+
+
+class RsyncCommand(Command):
+ """Implementation of gsutil rsync command."""
+
+ # Command specification. See base class for documentation.
+ command_spec = Command.CreateCommandSpec(
+ 'rsync',
+ command_name_aliases=[],
+ min_args=2,
+ max_args=2,
+ supported_sub_args='cCdenprR',
+ file_url_ok=True,
+ provider_url_ok=False,
+ urls_start_arg=0,
+ gs_api_support=[ApiSelector.XML, ApiSelector.JSON],
+ gs_default_api=ApiSelector.JSON,
+ )
+ # Help specification. See help_provider.py for documentation.
+ help_spec = Command.HelpSpec(
+ help_name='rsync',
+ help_name_aliases=['sync', 'synchronize'],
+ help_type='command_help',
+ help_one_line_summary='Synchronize content of two
buckets/directories',
+ help_text=_DETAILED_HELP_TEXT,
+ subcommand_help_text={},
+ )
+ total_bytes_transferred = 0
+
+ def _InsistContainer(self, url_str):
+ """Sanity checks that URL names an existing container.
+
+ Args:
+ url_str: URL string to check.
+
+ Returns:
+ URL for checked string.
+
+ Raises:
+ CommandException if url_str doesn't name an existing container.
+ """
+ (url, have_existing_container) = (
+ copy_helper.ExpandUrlToSingleBlr(url_str, self.gsutil_api,
self.debug,
+ self.project_id))
+ if not have_existing_container:
+ raise CommandException(
+ 'arg (%s) does not name a directory, bucket, or bucket subdir.'
+ % url_str)
+ return url
+
+ def RunCommand(self):
+ """Command entry point for the rsync command."""
+ self._ParseOpts()
+ if self.compute_checksums and not UsingCrcmodExtension(crcmod):
+ self.logger.warn(SLOW_CRCMOD_WARNING)
+
+ src_url = self._InsistContainer(self.args[0])
+ dst_url = self._InsistContainer(self.args[1])
+
+ # Tracks if any copy or rm operations failed.
+ self.op_failure_count = 0
+
+ # List of attributes to share/manage across multiple processes in
+ # parallel (-m) mode.
+ shared_attrs = ['op_failure_count']
+
+ # Perform sync requests in parallel (-m) mode, if requested, using
+ # configured number of parallel processes and threads. Otherwise,
+ # perform requests with sequential function calls in current process.
+ diff_iterator = _DiffIterator(self, src_url, dst_url)
+ self.logger.info('Starting synchronization')
+ try:
+ self.Apply(_RsyncFunc, diff_iterator, _RsyncExceptionHandler,
+ shared_attrs, arg_checker=_DiffToApplyArgChecker,
+ fail_on_error=True)
+ finally:
+ diff_iterator.CleanUpTempFiles()
+
+ if self.op_failure_count:
+ plural_str = 's' if self.op_failure_count else ''
+ raise CommandException(
+ '%d file%s/object%s could not be copied/removed.' %
+ (self.op_failure_count, plural_str, plural_str))
+
+ def _ParseOpts(self):
+ # exclude_symlinks is handled by Command parent class, so save in
Command
+ # state rather than CopyHelperOpts.
+ self.exclude_symlinks = False
+ # continue_on_error is handled by Command parent class, so save in
Command
+ # state rather than CopyHelperOpts.
+ self.continue_on_error = False
+ self.delete_extras = False
+ preserve_acl = False
+ self.compute_checksums = False
+ self.dryrun = False
+ # self.recursion_requested is initialized in command.py (so it can be
+ # checked in parent class for all commands).
+
+ if self.sub_opts:
+ for o, _ in self.sub_opts:
+ if o == '-c':
+ self.compute_checksums = True
+ # Note: In gsutil cp command this is specified using -c but here
we use
+ # -C so we can use -c for checksum arg (to be consistent with Unix
rsync
+ # command options).
+ elif o == '-C':
+ self.continue_on_error = True
+ elif o == '-d':
+ self.delete_extras = True
+ elif o == '-e':
+ self.exclude_symlinks = True
+ elif o == '-n':
+ self.dryrun = True
+ elif o == '-p':
+ preserve_acl = True
+ elif o == '-r' or o == '-R':
+ self.recursion_requested = True
+ return CreateCopyHelperOpts(preserve_acl=preserve_acl)
=======================================
--- /dev/null
+++ /third_party/gsutil/gslib/commands/signurl.py Mon Nov 3 12:38:28 2014
UTC
@@ -0,0 +1,304 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Implementation of Url Signing workflow.
+
+see: https://developers.google.com/storage/docs/accesscontrol#Signed-URLs)
+"""
+
+from __future__ import absolute_import
+
+import base64
+import calendar
+from datetime import datetime
+from datetime import timedelta
+import getpass
+import re
+import time
+import urllib
+
+import httplib2
+
+from gslib.command import Command
+from gslib.cs_api_map import ApiSelector
+from gslib.exception import CommandException
+from gslib.storage_url import ContainsWildcard
+from gslib.storage_url import StorageUrlFromString
+from gslib.util import GetNewHttp
+from gslib.util import NO_MAX
+
+try:
+ # Check for openssl.
+ # pylint: disable=C6204
+ from OpenSSL.crypto import load_pkcs12
+ from OpenSSL.crypto import sign
+ HAVE_OPENSSL = True
+except ImportError:
+ load_pkcs12 = None
+ sign = None
+ HAVE_OPENSSL = False
+
+_DETAILED_HELP_TEXT = ("""
+SYNOPSIS
+ gsutil signurl pkcs12-file url...
+
+
+DESCRIPTION
+ The signurl command will generate signed urls that can be used to access
+ the specified objects without authentication for a specific period of
time.
+
+ Please see the `Signed URLs documentation
+ https://developers.google.com/storage/docs/accesscontrol#Signed-URLs` for
+ background about signed URLs.
+
+ Multiple gs:// urls may be provided and may contain wildcards. A signed
url
+ will be produced for each provided url, authorized
+ for the specified HTTP method and valid for the given duration.
+
+ Note: Unlike the gsutil ls command, the signurl command does not support
+ operations on sub-directories. For example, if you run the command:
+
+ gsutil signurl <private-key-file> gs://some-bucket/some-object/
+
+ The signurl command uses the private key for a service account (the
+ '<private-key-file>' argument) to generate the cryptographic
+ signature for the generated URL. The private key file must be in PKCS12
+ format. The signurl command will prompt for the passphrase used to
protect
+ the private key file (default 'notasecret'). For more information
+ regarding generating a private key for use with the signurl command
please
+ see the `Authentication documentation.
+
https://developers.google.com/storage/docs/authentication#generating-a-private-key`
+
+ gsutil will look up information about the object "some-object/" (with a
+ trailing slash) inside bucket "some-bucket", as opposed to operating on
+ objects nested under gs://some-bucket/some-object. Unless you actually
+ have an object with that name, the operation will fail.
+
+OPTIONS
+ -m Specifies the HTTP method to be authorized for use
+ with the signed url, default is GET.
+
+ -d Specifies the duration that the signed url should be valid
+ for, default duration is 1 hour.
+
+ Times may be specified with no suffix (default hours), or
+ with s = seconds, m = minutes, h = hours, d = days.
+
+ This option may be specified multiple times, in which case
+ the duration the link remains valid is the sum of all the
+ duration options.
+
+ -c Specifies the content type for which the signed url is
+ valid for.
+
+ -p Specify the keystore password instead of prompting.
+
+USAGE
+
+ Create a signed url for downloading an object valid for 10 minutes:
+
+ gsutil signurl <private-key-file> -d 10m gs://<bucket>/<object>
+
+ Create a signed url for uploading a plain text file via HTTP PUT:
+
+ gsutil signurl <private-key-file> -m PUT -d 1h -c text/plain
gs://<bucket>/<obj>
+
+ To construct a signed URL that allows anyone in possession of
+ the URL to PUT to the specified bucket for one day, creating
+ any object of Content-Type image/jpg, run:
+
+ gsutil signurl <private-key-file> -m PUT -d 1d -c image/jpg
gs://<bucket>/<obj>
+
+
+""")
+
+
+def _DurationToTimeDelta(duration):
+ r"""Parses the given duration and returns an equivalent timedelta."""
+
+ match = re.match(r'^(\d+)([dDhHmMsS])?$', duration)
+ if not match:
+ raise CommandException('Unable to parse duration string')
+
+ duration, modifier = match.groups('h')
+ duration = int(duration)
+ modifier = modifier.lower()
+
+ if modifier == 'd':
+ ret = timedelta(days=duration)
+ elif modifier == 'h':
+ ret = timedelta(hours=duration)
+ elif modifier == 'm':
+ ret = timedelta(minutes=duration)
+ elif modifier == 's':
+ ret = timedelta(seconds=duration)
+
+ return ret
+
+
+def _GenSignedUrl(key, client_id, method, md5,
+ content_type, expiration, gcs_path):
+ """Construct a string to sign with the provided key and returns \
+ the complete url."""
+
+ tosign = ('{0}\n{1}\n{2}\n{3}\n/{4}'
+ .format(method, md5, content_type,
+ expiration, gcs_path))
+ signature = base64.b64encode(sign(key, tosign, 'RSA-SHA256'))
+
+ final_url = ('https://storage.googleapis.com/{0}?'
+ 'GoogleAccessId={1}&Expires={2}&Signature={3}'
+ .format(gcs_path, client_id, expiration,
+ urllib.quote_plus(str(signature))))
+
+ return final_url
+
+
+def _ReadKeystore(ks_contents, passwd):
+ ks = load_pkcs12(ks_contents, passwd)
+ client_id = (ks.get_certificate()
+ .get_subject()
+ .CN.replace('.apps.googleusercontent.com',
+ '@developer.gserviceaccount.com'))
+
+ return ks, client_id
+
+
+class UrlSignCommand(Command):
+ """Implementation of gsutil url_sign command."""
+
+ # Command specification. See base class for documentation.
+ command_spec = Command.CreateCommandSpec(
+ 'signurl',
+ command_name_aliases=['signedurl', 'queryauth'],
+ min_args=2,
+ max_args=NO_MAX,
+ supported_sub_args='m:d:c:p:',
+ file_url_ok=False,
+ provider_url_ok=False,
+ urls_start_arg=1,
+ gs_api_support=[ApiSelector.XML, ApiSelector.JSON],
+ gs_default_api=ApiSelector.JSON,
+ )
+ # Help specification. See help_provider.py for documentation.
+ help_spec = Command.HelpSpec(
+ help_name='signurl',
+ help_name_aliases=['signedurl', 'queryauth'],
+ help_type='command_help',
+ help_one_line_summary='Create a signed url',
+ help_text=_DETAILED_HELP_TEXT,
+ subcommand_help_text={},
+ )
+
+ def _ParseSubOpts(self):
+ # Default argument values
+ delta = None
+ method = 'GET'
+ content_type = ''
+ passwd = None
+
+ for o, v in self.sub_opts:
+ if o == '-d':
+ if delta is not None:
+ delta += _DurationToTimeDelta(v)
+ else:
+ delta = _DurationToTimeDelta(v)
+ elif o == '-m':
+ method = v
+ elif o == '-c':
+ content_type = v
+ elif o == '-p':
+ passwd = v
+
+ if delta is None:
+ delta = timedelta(hours=1)
+
+ expiration = calendar.timegm((datetime.utcnow() +
delta).utctimetuple())
+ if method not in ['GET', 'PUT', 'DELETE', 'HEAD']:
+ raise CommandException('HTTP method must be one of [GET|HEAD|PUT|
DELETE]')
+
+ return method, expiration, content_type, passwd
+
+ def _CheckClientCanRead(self, key, client_id, gcs_path):
+ """Performs a head request against a signed url to check for read
access."""
+
+ signed_url = _GenSignedUrl(key, client_id,
+ 'HEAD', '', '',
+ int(time.time()) + 10,
+ gcs_path)
+ h = GetNewHttp()
+ try:
+ response, _ = h.request(signed_url, 'HEAD')
+
+ return response.status == 200
+ except httplib2.HttpLib2Error as e:
+ raise CommandException('Unexpected error while querying'
+ 'object readability ({0})'
+ .format(e.message))
+
+ def _EnumerateStorageUrls(self, in_urls):
+ ret = []
+
+ for url_str in in_urls:
+ if ContainsWildcard(url_str):
+ ret.extend([blr.storage_url for blr in
self.WildcardIterator(url_str)])
+ else:
+ ret.append(StorageUrlFromString(url_str))
+
+ return ret
+
+ def RunCommand(self):
+ """Command entry point for signurl command."""
+ if not HAVE_OPENSSL:
+ raise CommandException(
+ 'The signurl command requires the pyopenssl library (try pip '
+ 'install pyopenssl or easy_install pyopenssl)')
+
+ method, expiration, content_type, passwd = self._ParseSubOpts()
+ storage_urls = self._EnumerateStorageUrls(self.args[1:])
+
+ if not passwd:
+ passwd = getpass.getpass('Keystore password:')
+
+ ks, client_id = _ReadKeystore(open(self.args[0], 'rb').read(), passwd)
+
+ print 'URL\tHTTP Method\tExpiration\tSigned URL'
+ for url in storage_urls:
+ if url.scheme != 'gs':
+ raise CommandException('Can only create signed urls from gs://
urls')
+ if url.IsBucket():
+ gcs_path = url.bucket_name
+ else:
+ gcs_path = '{0}/{1}'.format(url.bucket_name, url.object_name)
+
+ final_url = _GenSignedUrl(ks.get_privatekey(), client_id,
+ method, '', content_type, expiration,
+ gcs_path)
+
+ expiration_dt = datetime.fromtimestamp(expiration)
+
+ print '{0}\t{1}\t{2}\t{3}'.format(url, method,
+ (expiration_dt
+ .strftime('%Y-%m-%d %H:%M:%S')),
+ final_url)
+ if (method != 'PUT' and
+ not self._CheckClientCanRead(ks.get_privatekey(),
+ client_id,
+ gcs_path)):
+ self.logger.warn(
+ '%s does not have permissions on %s, using this link will
likely '
+ 'result in a 403 error until at least READ permissions are
granted',
+ client_id, url)
+
+ return 0
=======================================
--- /dev/null
+++ /third_party/gsutil/gslib/copy_helper.py Mon Nov 3 12:38:28 2014 UTC
@@ -0,0 +1,2776 @@
+# -*- coding: utf-8 -*-
+# Copyright 2011 Google Inc. All Rights Reserved.
+# Copyright 2011, Nexenta Systems Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Helper functions for copy functionality."""
+
+from __future__ import absolute_import
+
+import base64
+from collections import namedtuple
+import csv
+import datetime
+import errno
+import gzip
+import hashlib
+from hashlib import md5
+import json
+import logging
+import mimetypes
+import os
+import random
+import re
+import shutil
+import stat
+import subprocess
+import sys
+import tempfile
+import textwrap
+import time
+import traceback
+
+from boto import config
+import crcmod
+
+import gslib
+from gslib.cloud_api import ArgumentException
+from gslib.cloud_api import CloudApi
+from gslib.cloud_api import NotFoundException
+from gslib.cloud_api import PreconditionException
+from gslib.cloud_api import Preconditions
+from gslib.cloud_api import ResumableDownloadException
+from gslib.cloud_api import ResumableUploadAbortException
+from gslib.cloud_api import ResumableUploadException
+from gslib.cloud_api_helper import GetDownloadSerializationDict
+from gslib.commands.compose import MAX_COMPOSE_ARITY
+from gslib.commands.config import
DEFAULT_PARALLEL_COMPOSITE_UPLOAD_COMPONENT_SIZE
+from gslib.commands.config import
DEFAULT_PARALLEL_COMPOSITE_UPLOAD_THRESHOLD
+from gslib.cs_api_map import ApiSelector
+from gslib.daisy_chain_wrapper import DaisyChainWrapper
+from gslib.exception import CommandException
+from gslib.file_part import FilePart
+from gslib.hashing_helper import Base64EncodeHash
+from gslib.hashing_helper import CalculateB64EncodedMd5FromContents
+from gslib.hashing_helper import CalculateHashesFromContents
+from gslib.hashing_helper import GetDownloadHashAlgs
+from gslib.hashing_helper import GetUploadHashAlgs
+from gslib.hashing_helper import HashingFileUploadWrapper
+from gslib.progress_callback import ConstructAnnounceText
+from gslib.progress_callback import FileProgressCallbackHandler
+from gslib.progress_callback import ProgressCallbackWithBackoff
+from gslib.storage_url import ContainsWildcard
+from gslib.storage_url import StorageUrlFromString
+from gslib.third_party.storage_apitools import storage_v1_messages as
apitools_messages
+from gslib.translation_helper import AddS3MarkerAclToObjectMetadata
+from gslib.translation_helper import CopyObjectMetadata
+from gslib.translation_helper import DEFAULT_CONTENT_TYPE
+from gslib.translation_helper import GenerationFromUrlAndString
+from gslib.translation_helper import ObjectMetadataFromHeaders
+from gslib.translation_helper import PreconditionsFromHeaders
+from gslib.translation_helper import S3MarkerAclFromObjectMetadata
+from gslib.util import CreateLock
+from gslib.util import CreateTrackerDirIfNeeded
+from gslib.util import DEFAULT_FILE_BUFFER_SIZE
+from gslib.util import GetCloudApiInstance
+from gslib.util import GetFileSize
+from gslib.util import GetStreamFromFileUrl
+from gslib.util import HumanReadableToBytes
+from gslib.util import IS_WINDOWS
+from gslib.util import IsCloudSubdirPlaceholder
+from gslib.util import MakeHumanReadable
+from gslib.util import MIN_SIZE_COMPUTE_LOGGING
+from gslib.util import ResumableThreshold
+from gslib.util import TEN_MB
+from gslib.util import UTF8
+from gslib.wildcard_iterator import CreateWildcardIterator
+
+# pylint: disable=g-import-not-at-top
+if IS_WINDOWS:
+ import msvcrt
+ from ctypes import c_int
+ from ctypes import c_uint64
+ from ctypes import c_char_p
+ from ctypes import c_wchar_p
+ from ctypes import windll
+ from ctypes import POINTER
+ from ctypes import WINFUNCTYPE
+ from ctypes import WinError
+
+# Declare copy_helper_opts as a global because namedtuple isn't aware of
+# assigning to a class member (which breaks pickling done by
multiprocessing).
+# For details see
+#
http://stackoverflow.com/questions/16377215/how-to-pickle-a-namedtuple-instance-correctly
+# Similarly can't pickle logger.
+# pylint: disable=global-at-module-level
+global global_copy_helper_opts, global_logger
+
+PARALLEL_UPLOAD_TEMP_NAMESPACE = (
+
u'/gsutil/tmp/parallel_composite_uploads/for_details_see/gsutil_help_cp/')
+
+PARALLEL_UPLOAD_STATIC_SALT = u"""
+PARALLEL_UPLOAD_SALT_TO_PREVENT_COLLISIONS.
+The theory is that no user will have prepended this to the front of
+one of their object names and then done an MD5 hash of the name, and
+then prepended PARALLEL_UPLOAD_TEMP_NAMESPACE to the front of their object
+name. Note that there will be no problems with object name length since we
+hash the original name.
+"""
+
+TRACKER_FILE_UNWRITABLE_EXCEPTION_TEXT = (
+ 'Couldn\'t write tracker file (%s): %s. This can happen if gsutil is '
+ 'configured to save tracker files to an unwritable directory)')
+
+# When uploading a file, get the following fields in the response for
+# filling in command output and manifests.
+UPLOAD_RETURN_FIELDS = ['crc32c', 'generation', 'md5Hash', 'size']
+
+# This tuple is used only to encapsulate the arguments needed for
+# command.Apply() in the parallel composite upload case.
+# Note that content_type is used instead of a full apitools Object()
because
+# apitools objects are not picklable.
+# filename: String name of file.
+# file_start: start byte of file (may be in the middle of a file for
partitioned
+# files).
+# file_length: length of upload (may not be the entire length of a file for
+# partitioned files).
+# src_url: FileUrl describing the source file.
+# dst_url: CloudUrl describing the destination component file.
+# canned_acl: canned_acl to apply to the uploaded file/component.
+# content_type: content-type for final object, used for setting
content-type
+# of components and final object.
+# tracker_file: tracker file for this component.
+# tracker_file_lock: tracker file lock for tracker file(s).
+PerformParallelUploadFileToObjectArgs = namedtuple(
+ 'PerformParallelUploadFileToObjectArgs',
+ 'filename file_start file_length src_url dst_url canned_acl '
+ 'content_type tracker_file tracker_file_lock')
+
+ObjectFromTracker = namedtuple('ObjectFromTracker',
+ 'object_name generation')
+
+# The maximum length of a file name can vary wildly between different
+# operating systems, so we always ensure that tracker files are less
+# than 100 characters in order to avoid any such issues.
+MAX_TRACKER_FILE_NAME_LENGTH = 100
+
+# TODO: Refactor this file to be less cumbersome. In particular, some of
the
+# different paths (e.g., uploading a file to an object vs. downloading an
+# object to a file) could be split into separate files.
+
+# Chunk size to use while zipping/unzipping gzip files.
+GZIP_CHUNK_SIZE = 8192
+
+PARALLEL_COMPOSITE_SUGGESTION_THRESHOLD = 150 * 1024 * 1024
+
+suggested_parallel_composites = False
+
+
+class TrackerFileType(object):
+ UPLOAD = 'upload'
+ DOWNLOAD = 'download'
+ PARALLEL_UPLOAD = 'parallel_upload'
+
+
+def _RmExceptionHandler(cls, e):
+ """Simple exception handler to allow post-completion status."""
+ cls.logger.error(str(e))
+
+
+def _ParallelUploadCopyExceptionHandler(cls, e):
+ """Simple exception handler to allow post-completion status."""
+ cls.logger.error(str(e))
+ cls.op_failure_count += 1
+ cls.logger.debug('\n\nEncountered exception while copying:\n%s\n',
+ traceback.format_exc())
+
+
+def _PerformParallelUploadFileToObject(cls, args, thread_state=None):
+ """Function argument to Apply for performing parallel composite uploads.
+
+ Args:
+ cls: Calling Command class.
+ args: PerformParallelUploadFileToObjectArgs tuple describing the
target.
+ thread_state: gsutil Cloud API instance to use for the operation.
+
+ Returns:
+ StorageUrl representing a successfully uploaded component.
+ """
+ fp = FilePart(args.filename, args.file_start, args.file_length)
+ gsutil_api = GetCloudApiInstance(cls, thread_state=thread_state)
+ with fp:
+ # We take many precautions with the component names that make
collisions
+ # effectively impossible. Specifying preconditions will just allow us
to
+ # reach a state in which uploads will always fail on retries.
+ preconditions = None
+
+ # Fill in content type if one was provided.
+ dst_object_metadata = apitools_messages.Object(
+ name=args.dst_url.object_name,
+ bucket=args.dst_url.bucket_name,
+ contentType=args.content_type)
+
+ try:
+ if global_copy_helper_opts.canned_acl:
+ # No canned ACL support in JSON, force XML API to be used for
+ # upload/copy operations.
+ orig_prefer_api = gsutil_api.prefer_api
+ gsutil_api.prefer_api = ApiSelector.XML
+ ret = _UploadFileToObject(args.src_url, fp, args.file_length,
+ args.dst_url, dst_object_metadata,
+ preconditions, gsutil_api, cls.logger, cls,
+ _ParallelUploadCopyExceptionHandler,
+ gzip_exts=None, allow_splitting=False)
+ finally:
+ if global_copy_helper_opts.canned_acl:
+ gsutil_api.prefer_api = orig_prefer_api
+
+ component = ret[2]
+ _AppendComponentTrackerToParallelUploadTrackerFile(
+ args.tracker_file, component, args.tracker_file_lock)
+ return ret
+
+
+CopyHelperOpts = namedtuple('CopyHelperOpts', [
+ 'perform_mv',
+ 'no_clobber',
+ 'daisy_chain',
+ 'read_args_from_stdin',
+ 'print_ver',
+ 'use_manifest',
+ 'preserve_acl',
+ 'canned_acl',
+ 'halt_at_byte'])
+
+
+# pylint: disable=global-variable-undefined
+def CreateCopyHelperOpts(perform_mv=False, no_clobber=False,
daisy_chain=False,
+ read_args_from_stdin=False, print_ver=False,
+ use_manifest=False, preserve_acl=False,
+ canned_acl=None, halt_at_byte=None):
+ """Creates CopyHelperOpts for passing options to CopyHelper."""
+ # We create a tuple with union of options needed by CopyHelper and any
+ # copy-related functionality in CpCommand, RsyncCommand, or Command
class.
+ global global_copy_helper_opts
+ global_copy_helper_opts = CopyHelperOpts(
+ perform_mv=perform_mv,
+ no_clobber=no_clobber,
+ daisy_chain=daisy_chain,
+ read_args_from_stdin=read_args_from_stdin,
+ print_ver=print_ver,
+ use_manifest=use_manifest,
+ preserve_acl=preserve_acl,
+ canned_acl=canned_acl,
+ halt_at_byte=halt_at_byte)
+ return global_copy_helper_opts
+
+
+# pylint: disable=global-variable-undefined
+# pylint: disable=global-variable-not-assigned
+def GetCopyHelperOpts():
+ """Returns namedtuple holding CopyHelper options."""
+ global global_copy_helper_opts
+ return global_copy_helper_opts
+
+
+def GetTrackerFilePath(dst_url, tracker_file_type, api_selector,
src_url=None):
+ """Gets the tracker file name described by the arguments.
+
+ Public for testing purposes.
+
+ Args:
+ dst_url: Destination URL for tracker file.
+ tracker_file_type: TrackerFileType for this operation.
+ api_selector: API to use for this operation.
+ src_url: Source URL for the source file name for parallel uploads.
+
+ Returns:
+ File path to tracker file.
+ """
+ resumable_tracker_dir = CreateTrackerDirIfNeeded()
+ if tracker_file_type == TrackerFileType.UPLOAD:
+ # Encode the dest bucket and object name into the tracker file name.
+ res_tracker_file_name = (
+ re.sub('[/\\\\]', '_', 'resumable_upload__%s__%s__%s.url' %
+ (dst_url.bucket_name, dst_url.object_name, api_selector)))
+ elif tracker_file_type == TrackerFileType.DOWNLOAD:
+ # Encode the fully-qualified dest file name into the tracker file name.
+ res_tracker_file_name = (
+ re.sub('[/\\\\]', '_', 'resumable_download__%s__%s.etag' %
+ (os.path.realpath(dst_url.object_name), api_selector)))
+ elif tracker_file_type == TrackerFileType.PARALLEL_UPLOAD:
+ # Encode the dest bucket and object names as well as the source file
name
+ # into the tracker file name.
+ res_tracker_file_name = (
+ re.sub('[/\\\\]', '_', 'parallel_upload__%s__%s__%s__%s.url' %
+ (dst_url.bucket_name, dst_url.object_name,
+ src_url, api_selector)))
+
+ res_tracker_file_name = _HashFilename(res_tracker_file_name)
+ tracker_file_name = '%s_%s' % (str(tracker_file_type).lower(),
+ res_tracker_file_name)
+ tracker_file_path = '%s%s%s' % (resumable_tracker_dir, os.sep,
+ tracker_file_name)
+ assert len(tracker_file_name) < MAX_TRACKER_FILE_NAME_LENGTH
+ return tracker_file_path
+
+
+def _SelectDownloadStrategy(src_obj_metadata, dst_url):
+ """Get download strategy based on the source and dest objects.
+
+ Args:
+ src_obj_metadata: Object describing the source object.
+ dst_url: Destination StorageUrl.
+
+ Returns:
+ gsutil Cloud API DownloadStrategy.
+ """
+ dst_is_special = False
+ if dst_url.IsFileUrl():
+ # Check explicitly first because os.stat doesn't work on 'nul' in
Windows.
+ if dst_url.object_name == os.devnull:
+ dst_is_special = True
+ try:
+ mode = os.stat(dst_url.object_name).st_mode
+ if stat.S_ISCHR(mode):
+ dst_is_special = True
+ except OSError:
+ pass
+
+ if src_obj_metadata.size >= ResumableThreshold() and not dst_is_special:
+ return CloudApi.DownloadStrategy.RESUMABLE
+ else:
+ return CloudApi.DownloadStrategy.ONE_SHOT
+
+
+def _GetUploadTrackerData(tracker_file_name, logger):
+ """Checks for an upload tracker file and creates one if it does not
exist.
+
+ Args:
+ tracker_file_name: Tracker file name for this upload.
+ logger: for outputting log messages.
+
+ Returns:
+ Serialization data if the tracker file already exists (resume existing
+ upload), None otherwise.
+ """
+ tracker_file = None
+
+ # If we already have a matching tracker file, get the serialization data
+ # so that we can resume the upload.
+ try:
+ tracker_file = open(tracker_file_name, 'r')
+ tracker_data = tracker_file.read()
+ return tracker_data
+ except IOError as e:
+ # Ignore non-existent file (happens first time a upload
+ # is attempted on an object), but warn user for other errors.
+ if e.errno != errno.ENOENT:
+ logger.warn('Couldn\'t read upload tracker file (%s): %s.
Restarting '
+ 'upload from scratch.', tracker_file_name, e.strerror)
+ finally:
+ if tracker_file:
+ tracker_file.close()
+
+
+def _ReadOrCreateDownloadTrackerFile(src_obj_metadata, dst_url,
+ api_selector):
+ """Checks for a download tracker file and creates one if it does not
exist.
+
+ Args:
+ src_obj_metadata: Metadata for the source object. Must include
+ etag.
+ dst_url: Destination file StorageUrl.
+ api_selector: API mode to use (for tracker file naming).
+
+ Returns:
+ True if the tracker file already exists (resume existing download),
+ False if we created a new tracker file (new download).
+ """
+ assert src_obj_metadata.etag
+ tracker_file_name = GetTrackerFilePath(
+ dst_url, TrackerFileType.DOWNLOAD, api_selector)
+ tracker_file = None
+
+ # Check to see if we already have a matching tracker file.
+ try:
+ tracker_file = open(tracker_file_name, 'r')
+ etag_value = tracker_file.readline().rstrip('\n')
+ if etag_value == src_obj_metadata.etag:
+ return True
+ except IOError as e:
+ # Ignore non-existent file (happens first time a download
+ # is attempted on an object), but warn user for other errors.
+ if e.errno != errno.ENOENT:
+ print('Couldn\'t read URL tracker file (%s): %s. Restarting '
+ 'download from scratch.' %
+ (tracker_file_name, e.strerror))
+ finally:
+ if tracker_file:
+ tracker_file.close()
+
+ # Otherwise, create a new tracker file and start from scratch.
+ try:
+ with os.fdopen(os.open(tracker_file_name,
+ os.O_WRONLY | os.O_CREAT, 0600), 'w') as tf:
+ tf.write('%s\n' % src_obj_metadata.etag)
+ return False
+ except IOError as e:
+ raise CommandException(TRACKER_FILE_UNWRITABLE_EXCEPTION_TEXT %
+ (tracker_file_name, e.strerror))
+ finally:
+ if tracker_file:
+ tracker_file.close()
+
+
+def _DeleteTrackerFile(tracker_file_name):
+ if tracker_file_name and os.path.exists(tracker_file_name):
+ os.unlink(tracker_file_name)
+
+
+def InsistDstUrlNamesContainer(exp_dst_url, have_existing_dst_container,
+ command_name):
+ """Ensures the destination URL names a container.
+
+ Acceptable containers include directory, bucket, bucket
+ subdir, and non-existent bucket subdir.
+
+ Args:
+ exp_dst_url: Wildcard-expanded destination StorageUrl.
+ have_existing_dst_container: bool indicator of whether exp_dst_url
+ names a container (directory, bucket, or existing bucket subdir).
+ command_name: Name of command making call. May not be the same as the
+ calling class's self.command_name in the case of commands
implemented
+ atop other commands (like mv command).
+
+ Raises:
+ CommandException: if the URL being checked does not name a container.
+ """
+ if ((exp_dst_url.IsFileUrl() and not exp_dst_url.IsDirectory()) or
+ (exp_dst_url.IsCloudUrl() and exp_dst_url.IsBucket()
+ and not have_existing_dst_container)):
+ raise CommandException('Destination URL must name a directory,
bucket, '
+ 'or bucket\nsubdirectory for the multiple '
+ 'source form of the %s command.' % command_name)
+
+
+def _ShouldTreatDstUrlAsBucketSubDir(have_multiple_srcs, dst_url,
+ have_existing_dest_subdir,
+ src_url_names_container,
+ recursion_requested):
+ """Checks whether dst_url should be treated as a bucket "sub-directory".
+
+ The decision about whether something constitutes a bucket "sub-directory"
+ depends on whether there are multiple sources in this request and whether
+ there is an existing bucket subdirectory. For example, when running the
+ command:
+ gsutil cp file gs://bucket/abc
+ if there's no existing gs://bucket/abc bucket subdirectory we should copy
+ file to the object gs://bucket/abc. In contrast, if
+ there's an existing gs://bucket/abc bucket subdirectory we should copy
+ file to gs://bucket/abc/file. And regardless of whether gs://bucket/abc
+ exists, when running the command:
+ gsutil cp file1 file2 gs://bucket/abc
+ we should copy file1 to gs://bucket/abc/file1 (and similarly for file2).
+ Finally, for recursive copies, if the source is a container then we
should
+ copy to a container as the target. For example, when running the
command:
+ gsutil cp -r dir1 gs://bucket/dir2
+ we should copy the subtree of dir1 to gs://bucket/dir2.
+
+ Note that we don't disallow naming a bucket "sub-directory" where there's
+ already an object at that URL. For example it's legitimate (albeit
+ confusing) to have an object called gs://bucket/dir and
+ then run the command
+ gsutil cp file1 file2 gs://bucket/dir
+ Doing so will end up with objects gs://bucket/dir, gs://bucket/dir/file1,
+ and gs://bucket/dir/file2.
+
+ Args:
+ have_multiple_srcs: Bool indicator of whether this is a multi-source
+ operation.
+ dst_url: StorageUrl to check.
+ have_existing_dest_subdir: bool indicator whether dest is an existing
+ subdirectory.
+ src_url_names_container: bool indicator of whether the source URL
+ is a container.
+ recursion_requested: True if a recursive operation has been requested.
+
+ Returns:
+ bool indicator.
+ """
+ if have_existing_dest_subdir:
+ return True
+ if dst_url.IsCloudUrl():
+ return (have_multiple_srcs or
+ (src_url_names_container and recursion_requested))
+
+
+def _ShouldTreatDstUrlAsSingleton(have_multiple_srcs,
+ have_existing_dest_subdir, dst_url,
+ recursion_requested):
+ """Checks that dst_url names a single file/object after wildcard
expansion.
+
+ It is possible that an object path might name a bucket sub-directory.
+
+ Args:
+ have_multiple_srcs: Bool indicator of whether this is a multi-source
+ operation.
+ have_existing_dest_subdir: bool indicator whether dest is an existing
+ subdirectory.
+ dst_url: StorageUrl to check.
+ recursion_requested: True if a recursive operation has been requested.
+
+ Returns:
+ bool indicator.
+ """
+ if recursion_requested:
+ return False
+ if dst_url.IsFileUrl():
+ return not dst_url.IsDirectory()
+ else: # dst_url.IsCloudUrl()
+ return (not have_multiple_srcs and
+ not have_existing_dest_subdir and
+ dst_url.IsObject())
+
+
+def ConstructDstUrl(src_url, exp_src_url, src_url_names_container,
+ have_multiple_srcs, exp_dst_url,
have_existing_dest_subdir,
+ recursion_requested):
+ """Constructs the destination URL for a given exp_src_url/exp_dst_url
pair.
+
+ Uses context-dependent naming rules that mimic Linux cp and mv behavior.
+
+ Args:
+ src_url: Source StorageUrl to be copied.
+ exp_src_url: Single StorageUrl from wildcard expansion of src_url.
+ src_url_names_container: True if src_url names a container (including
the
+ case of a wildcard-named bucket subdir (like gs://bucket/abc,
+ where gs://bucket/abc/* matched some objects).
+ have_multiple_srcs: True if this is a multi-source request. This can be
+ true if src_url wildcard-expanded to multiple URLs or if there were
+ multiple source URLs in the request.
+ exp_dst_url: the expanded StorageUrl requested for the cp destination.
+ Final written path is constructed from this plus a
context-dependent
+ variant of src_url.
+ have_existing_dest_subdir: bool indicator whether dest is an existing
+ subdirectory.
+ recursion_requested: True if a recursive operation has been requested.
+
+ Returns:
+ StorageUrl to use for copy.
+
+ Raises:
+ CommandException if destination object name not specified for
+ source and source is a stream.
+ """
+ if _ShouldTreatDstUrlAsSingleton(
+ have_multiple_srcs, have_existing_dest_subdir, exp_dst_url,
+ recursion_requested):
+ # We're copying one file or object to one file or object.
+ return exp_dst_url
+
+ if exp_src_url.IsFileUrl() and exp_src_url.IsStream():
+ if have_existing_dest_subdir:
+ raise CommandException('Destination object name needed when '
+ 'source is a stream')
+ return exp_dst_url
+
+ if not recursion_requested and not have_multiple_srcs:
+ # We're copying one file or object to a subdirectory. Append final comp
+ # of exp_src_url to exp_dst_url.
+ src_final_comp = exp_src_url.object_name.rpartition(src_url.delim)[-1]
+ return StorageUrlFromString('%s%s%s' % (
+ exp_dst_url.url_string.rstrip(exp_dst_url.delim),
+ exp_dst_url.delim, src_final_comp))
+
+ # Else we're copying multiple sources to a directory, bucket, or a bucket
+ # "sub-directory".
+
+ # Ensure exp_dst_url ends in delim char if we're doing a multi-src copy
or
+ # a copy to a directory. (The check for copying to a directory needs
+ # special-case handling so that the command:
+ # gsutil cp gs://bucket/obj dir
+ # will turn into file://dir/ instead of file://dir -- the latter would
cause
+ # the file "dirobj" to be created.)
+ # Note: need to check have_multiple_srcs or src_url.names_container()
+ # because src_url could be a bucket containing a single object, named
+ # as gs://bucket.
+ if ((have_multiple_srcs or src_url_names_container or
+ (exp_dst_url.IsFileUrl() and exp_dst_url.IsDirectory()))
+ and not exp_dst_url.url_string.endswith(exp_dst_url.delim)):
+ exp_dst_url = StorageUrlFromString('%s%s' % (exp_dst_url.url_string,
+ exp_dst_url.delim))
+
+ # Making naming behavior match how things work with local Linux cp and mv
+ # operations depends on many factors, including whether the destination
is a
+ # container, the plurality of the source(s), and whether the mv command
is
+ # being used:
+ # 1. For the "mv" command that specifies a non-existent destination
subdir,
+ # renaming should occur at the level of the src subdir, vs appending
that
+ # subdir beneath the dst subdir like is done for copying. For example:
+ # gsutil rm -R gs://bucket
+ # gsutil cp -R dir1 gs://bucket
+ # gsutil cp -R dir2 gs://bucket/subdir1
+ # gsutil mv gs://bucket/subdir1 gs://bucket/subdir2
+ # would (if using cp naming behavior) end up with paths like:
+ # gs://bucket/subdir2/subdir1/dir2/.svn/all-wcprops
+ # whereas mv naming behavior should result in:
+ # gs://bucket/subdir2/dir2/.svn/all-wcprops
+ # 2. Copying from directories, buckets, or bucket subdirs should result
in
+ # objects/files mirroring the source directory hierarchy. For example:
+ # gsutil cp dir1/dir2 gs://bucket
+ # should create the object gs://bucket/dir2/file2, assuming dir1/dir2
+ # contains file2).
+ # To be consistent with Linux cp behavior, there's one more wrinkle
when
+ # working with subdirs: The resulting object names depend on whether
the
+ # destination subdirectory exists. For example, if gs://bucket/subdir
+ # exists, the command:
+ # gsutil cp -R dir1/dir2 gs://bucket/subdir
+ # should create objects named like gs://bucket/subdir/dir2/a/b/c. In
+ # contrast, if gs://bucket/subdir does not exist, this same command
+ # should create objects named like gs://bucket/subdir/a/b/c.
+ # 3. Copying individual files or objects to dirs, buckets or bucket
subdirs
+ # should result in objects/files named by the final source file name
+ # component. Example:
+ # gsutil cp dir1/*.txt gs://bucket
+ # should create the objects gs://bucket/f1.txt and gs://bucket/f2.txt,
+ # assuming dir1 contains f1.txt and f2.txt.
+
+ recursive_move_to_new_subdir = False
+ if (global_copy_helper_opts.perform_mv and recursion_requested
+ and src_url_names_container and not have_existing_dest_subdir):
+ # Case 1. Handle naming rules for bucket subdir mv. Here we want to
+ # line up the src_url against its expansion, to find the base to build
+ # the new name. For example, running the command:
+ # gsutil mv gs://bucket/abcd gs://bucket/xyz
+ # when processing exp_src_url=gs://bucket/abcd/123
+ # exp_src_url_tail should become /123
+ # Note: mv.py code disallows wildcard specification of source URL.
+ recursive_move_to_new_subdir = True
+ exp_src_url_tail = (
+ exp_src_url.url_string[len(src_url.url_string):])
+ dst_key_name = '%s/%s' % (exp_dst_url.object_name.rstrip('/'),
+ exp_src_url_tail.strip('/'))
+
+ elif src_url_names_container and (exp_dst_url.IsCloudUrl() or
+ exp_dst_url.IsDirectory()):
+ # Case 2. Container copy to a destination other than a file.
+ # Build dst_key_name from subpath of exp_src_url past
+ # where src_url ends. For example, for src_url=gs://bucket/ and
+ # exp_src_url=gs://bucket/src_subdir/obj, dst_key_name should be
+ # src_subdir/obj.
+ src_url_path_sans_final_dir = GetPathBeforeFinalDir(src_url)
+ dst_key_name = exp_src_url.versionless_url_string[
+ len(src_url_path_sans_final_dir):].lstrip(src_url.delim)
+ # Handle case where dst_url is a non-existent subdir.
+ if not have_existing_dest_subdir:
+ dst_key_name = dst_key_name.partition(src_url.delim)[-1]
+ # Handle special case where src_url was a directory named with '.' or
+ # './', so that running a command like:
+ # gsutil cp -r . gs://dest
+ # will produce obj names of the form gs://dest/abc instead of
+ # gs://dest/./abc.
+ if dst_key_name.startswith('.%s' % os.sep):
+ dst_key_name = dst_key_name[2:]
+
+ else:
+ # Case 3.
+ dst_key_name = exp_src_url.object_name.rpartition(src_url.delim)[-1]
+
+ if (not recursive_move_to_new_subdir and (
+ exp_dst_url.IsFileUrl() or _ShouldTreatDstUrlAsBucketSubDir(
+ have_multiple_srcs, exp_dst_url, have_existing_dest_subdir,
+ src_url_names_container, recursion_requested))):
+ if exp_dst_url.object_name and exp_dst_url.object_name.endswith(
+ exp_dst_url.delim):
+ dst_key_name = '%s%s%s' % (
+ exp_dst_url.object_name.rstrip(exp_dst_url.delim),
+ exp_dst_url.delim, dst_key_name)
+ else:
+ delim = exp_dst_url.delim if exp_dst_url.object_name else ''
+ dst_key_name = '%s%s%s' % (exp_dst_url.object_name or '',
+ delim, dst_key_name)
+
+ new_exp_dst_url = exp_dst_url.Clone()
+ new_exp_dst_url.object_name = dst_key_name.replace(src_url.delim,
+ exp_dst_url.delim)
+ return new_exp_dst_url
+
+
+def _CreateDigestsFromDigesters(digesters):
+ digests = {}
+ if digesters:
+ for alg in digesters:
+ digests[alg] = base64.encodestring(
+ digesters[alg].digest()).rstrip('\n')
+ return digests
+
+
+def _CreateDigestsFromLocalFile(logger, algs, file_name, src_obj_metadata):
+ """Creates a base64 CRC32C and/or MD5 digest from file_name.
+
+ Args:
+ logger: for outputting log messages.
+ algs: list of algorithms to compute.
+ file_name: file to digest.
+ src_obj_metadata: metadta of source object.
+
+ Returns:
+ Dict of algorithm name : base 64 encoded digest
+ """
+ hash_dict = {}
+ if 'md5' in algs:
+ if src_obj_metadata.size and src_obj_metadata.size > TEN_MB:
+ logger.info(
+ 'Computing MD5 for %s...', file_name)
+ hash_dict['md5'] = md5()
+ if 'crc32c' in algs:
+ hash_dict['crc32c'] = crcmod.predefined.Crc('crc-32c')
+ with open(file_name, 'rb') as fp:
+ CalculateHashesFromContents(
+ fp, hash_dict, ProgressCallbackWithBackoff(
+ src_obj_metadata.size,
+ FileProgressCallbackHandler(
+ ConstructAnnounceText('Hashing', file_name), logger).call))
+ digests = {}
+ for alg_name, digest in hash_dict.iteritems():
+ digests[alg_name] = Base64EncodeHash(digest.hexdigest())
+ return digests
+
+
+def _CheckCloudHashes(logger, src_url, dst_url, src_obj_metadata,
+ dst_obj_metadata):
+ """Validates integrity of two cloud objects copied via daisy-chain.
+
+ Args:
+ logger: for outputting log messages.
+ src_url: CloudUrl for source cloud object.
+ dst_url: CloudUrl for destination cloud object.
+ src_obj_metadata: Cloud Object metadata for object being downloaded
from.
+ dst_obj_metadata: Cloud Object metadata for object being uploaded to.
+
+ Raises:
+ CommandException: if cloud digests don't match local digests.
+ """
+ checked_one = False
+ download_hashes = {}
+ upload_hashes = {}
+ if src_obj_metadata.md5Hash:
+ download_hashes['md5'] = src_obj_metadata.md5Hash
+ if src_obj_metadata.crc32c:
+ download_hashes['crc32c'] = src_obj_metadata.crc32c
+ if dst_obj_metadata.md5Hash:
+ upload_hashes['md5'] = dst_obj_metadata.md5Hash
+ if dst_obj_metadata.crc32c:
+ upload_hashes['crc32c'] = dst_obj_metadata.crc32c
+
+ for alg, upload_b64_digest in upload_hashes.iteritems():
+ if alg not in download_hashes:
+ continue
+
+ download_b64_digest = download_hashes[alg]
+ logger.debug(
+ 'Comparing source vs destination %s-checksum for %s. (%s/%s)', alg,
+ dst_url, download_b64_digest, upload_b64_digest)
+ if download_b64_digest != upload_b64_digest:
+ raise CommandException(
+ '%s signature for source object (%s) doesn\'t match '
+ 'destination object digest (%s). Object (%s) will be deleted.' %
(
+ alg, download_b64_digest, upload_b64_digest, dst_url))
+ checked_one = True
+ if not checked_one:
+ # One known way this can currently happen is when downloading objects
larger
+ # than 5GB from S3 (for which the etag is not an MD5).
+ logger.warn(
+ 'WARNING: Found no hashes to validate object downloaded from %s
and '
+ 'uploaded to %s. Integrity cannot be assured without hashes.',
+ src_url, dst_url)
+
+
+def _CheckHashes(logger, obj_url, obj_metadata, file_name, digests,
+ is_upload=False):
+ """Validates integrity by comparing cloud digest to local digest.
+
+ Args:
+ logger: for outputting log messages.
+ obj_url: CloudUrl for cloud object.
+ obj_metadata: Cloud Object being downloaded from or uploaded to.
+ file_name: Local file name on disk being downloaded to or uploaded
from.
+ digests: Computed Digests for the object.
+ is_upload: If true, comparing for an uploaded object (controls
logging).
+
+ Raises:
+ CommandException: if cloud digests don't match local digests.
+ """
+ local_hashes = digests
+ cloud_hashes = {}
+ if obj_metadata.md5Hash:
+ cloud_hashes['md5'] = obj_metadata.md5Hash.rstrip('\n')
+ if obj_metadata.crc32c:
+ cloud_hashes['crc32c'] = obj_metadata.crc32c.rstrip('\n')
+
+ checked_one = False
+ for alg in local_hashes:
+ if alg not in cloud_hashes:
+ continue
+
+ local_b64_digest = local_hashes[alg]
+ cloud_b64_digest = cloud_hashes[alg]
+ logger.debug(
+ 'Comparing local vs cloud %s-checksum for %s. (%s/%s)', alg,
file_name,
+ local_b64_digest, cloud_b64_digest)
+ if local_b64_digest != cloud_b64_digest:
+
+ raise CommandException(
+ '%s signature computed for local file (%s) doesn\'t match '
+ 'cloud-supplied digest (%s). %s (%s) will be deleted.' % (
+ alg, local_b64_digest, cloud_b64_digest,
+ 'Cloud object' if is_upload else 'Local file',
+ obj_url if is_upload else file_name))
+ checked_one = True
+ if not checked_one:
+ if is_upload:
+ logger.warn(
+ 'WARNING: Found no hashes to validate object uploaded to %s. '
+ 'Integrity cannot be assured without hashes.', obj_url)
+ else:
+ # One known way this can currently happen is when downloading objects
larger
+ # than 5GB from S3 (for which the etag is not an MD5).
+ logger.warn(
+ 'WARNING: Found no hashes to validate object downloaded to %s. '
+ 'Integrity cannot be assured without hashes.', file_name)
+
+
+def IsNoClobberServerException(e):
+ """Checks to see if the server attempted to clobber a file.
+
+ In this case we specified via a precondition that we didn't want the file
+ clobbered.
+
+ Args:
+ e: The Exception that was generated by a failed copy operation
+
+ Returns:
+ bool indicator - True indicates that the server did attempt to clobber
+ an existing file.
+ """
+ return ((isinstance(e, PreconditionException)) or
+ (isinstance(e, ResumableUploadException) and '412' in e.message))
+
+
+def CheckForDirFileConflict(exp_src_url, dst_url):
+ """Checks whether copying exp_src_url into dst_url is not possible.
+
+ This happens if a directory exists in local file system where a file
+ needs to go or vice versa. In that case we print an error message and
+ exits. Example: if the file "./x" exists and you try to do:
+ gsutil cp gs://mybucket/x/y .
+ the request can't succeed because it requires a directory where
+ the file x exists.
+
+ Note that we don't enforce any corresponding restrictions for buckets,
+ because the flat namespace semantics for buckets doesn't prohibit such
+ cases the way hierarchical file systems do. For example, if a bucket
+ contains an object called gs://bucket/dir and then you run the
command:
+ gsutil cp file1 file2 gs://bucket/dir
+ you'll end up with objects gs://bucket/dir, gs://bucket/dir/file1, and
+ gs://bucket/dir/file2.
+
+ Args:
+ exp_src_url: Expanded source StorageUrl.
+ dst_url: Destination StorageUrl.
+
+ Raises:
+ CommandException: if errors encountered.
+ """
+ if dst_url.IsCloudUrl():
+ # The problem can only happen for file destination URLs.
+ return
+ dst_path = dst_url.object_name
+ final_dir = os.path.dirname(dst_path)
+ if os.path.isfile(final_dir):
+ raise CommandException('Cannot retrieve %s because a file exists '
+ 'where a directory needs to be created (%s).' %
+ (exp_src_url.url_string, final_dir))
+ if os.path.isdir(dst_path):
+ raise CommandException('Cannot retrieve %s because a directory exists '
+ '(%s) where the file needs to be created.' %
+ (exp_src_url.url_string, dst_path))
+
+
+def _PartitionFile(fp, file_size, src_url, content_type, canned_acl,
+ dst_bucket_url, random_prefix, tracker_file,
+ tracker_file_lock):
+ """Partitions a file into FilePart objects to be uploaded and later
composed.
+
+ These objects, when composed, will match the original file. This entails
+ splitting the file into parts, naming and forming a destination URL for
each
+ part, and also providing the PerformParallelUploadFileToObjectArgs
+ corresponding to each part.
+
+ Args:
+ fp: The file object to be partitioned.
+ file_size: The size of fp, in bytes.
+ src_url: Source FileUrl from the original command.
+ content_type: content type for the component and final objects.
+ canned_acl: The user-provided canned_acl, if applicable.
+ dst_bucket_url: CloudUrl for the destination bucket
+ random_prefix: The randomly-generated prefix used to prevent collisions
+ among the temporary component names.
+ tracker_file: The path to the parallel composite upload tracker file.
+ tracker_file_lock: The lock protecting access to the tracker file.
+
+ Returns:
+ dst_args: The destination URIs for the temporary component objects.
+ """
+ parallel_composite_upload_component_size = HumanReadableToBytes(
+ config.get('GSUtil', 'parallel_composite_upload_component_size',
+ DEFAULT_PARALLEL_COMPOSITE_UPLOAD_COMPONENT_SIZE))
+ (num_components, component_size) = _GetPartitionInfo(
+ file_size, MAX_COMPOSE_ARITY,
parallel_composite_upload_component_size)
+
+ dst_args = {} # Arguments to create commands and pass to subprocesses.
+ file_names = [] # Used for the 2-step process of forming dst_args.
+ for i in range(num_components):
+ # "Salt" the object name with something a user is very unlikely to have
+ # used in an object name, then hash the extended name to make sure
+ # we don't run into problems with name length. Using a deterministic
+ # naming scheme for the temporary components allows users to take
+ # advantage of resumable uploads for each component.
+ encoded_name = (PARALLEL_UPLOAD_STATIC_SALT + fp.name).encode(UTF8)
+ content_md5 = md5()
+ content_md5.update(encoded_name)
+ digest = content_md5.hexdigest()
+ temp_file_name = (random_prefix + PARALLEL_UPLOAD_TEMP_NAMESPACE +
+ digest + '_' + str(i))
+ tmp_dst_url = dst_bucket_url.Clone()
+ tmp_dst_url.object_name = temp_file_name
+
+ if i < (num_components - 1):
+ # Every component except possibly the last is the same size.
+ file_part_length = component_size
+ else:
+ # The last component just gets all of the remaining bytes.
+ file_part_length = (file_size - ((num_components -1) *
component_size))
+ offset = i * component_size
+ func_args = PerformParallelUploadFileToObjectArgs(
+ fp.name, offset, file_part_length, src_url, tmp_dst_url,
canned_acl,
+ content_type, tracker_file, tracker_file_lock)
+ file_names.append(temp_file_name)
+ dst_args[temp_file_name] = func_args
+
+ return dst_args
+
+
+def _DoParallelCompositeUpload(fp, src_url, dst_url, dst_obj_metadata,
+ canned_acl, file_size, preconditions,
gsutil_api,
+ command_obj, copy_exception_handler):
+ """Uploads a local file to a cloud object using parallel composite
upload.
+
+ The file is partitioned into parts, and then the parts are uploaded in
+ parallel, composed to form the original destination object, and deleted.
+
+ Args:
+ fp: The file object to be uploaded.
+ src_url: FileUrl representing the local file.
+ dst_url: CloudUrl representing the destination file.
+ dst_obj_metadata: apitools Object describing the destination object.
+ canned_acl: The canned acl to apply to the object, if any.
+ file_size: The size of the source file in bytes.
+ preconditions: Cloud API Preconditions for the final object.
+ gsutil_api: gsutil Cloud API instance to use.
+ command_obj: Command object (for calling Apply).
+ copy_exception_handler: Copy exception handler (for use in Apply).
+
+ Returns:
+ Elapsed upload time, uploaded Object with generation, crc32c, and size
+ fields populated.
+ """
***The diff for this file has been truncated for email.***
=======================================
--- /dev/null
+++ /third_party/gsutil/gslib/cs_api_map.py Mon Nov 3 12:38:28 2014 UTC
@@ -0,0 +1,103 @@
+# -*- coding: utf-8 -*-
+# Copyright 2013 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""API map classes used with the CloudApiDelegator class."""
+
+from __future__ import absolute_import
+
+from gslib.boto_translation import BotoTranslation
+from gslib.gcs_json_api import GcsJsonApi
+
+
+class ApiSelector(object):
+ """Enum class for API."""
+ XML = 'XML'
+ JSON = 'JSON'
+
+
+class ApiMapConstants(object):
+ """Enum class for API map entries."""
+ API_MAP = 'apiclass'
+ SUPPORT_MAP = 'supported'
+ DEFAULT_MAP = 'default'
+
+
+class GsutilApiClassMapFactory(object):
+ """Factory for generating gsutil API class maps.
+
+ A valid class map is defined as:
+ {
+ (key) Provider prefix used in URI strings.
+ (value) {
+ (key) ApiSelector describing the API format.
+ (value) CloudApi child class that implements this API.
+ }
+ }
+ """
+
+ @classmethod
+ def GetClassMap(cls):
+ """Returns the default gsutil class map."""
+ gs_class_map = {
+ ApiSelector.XML: BotoTranslation,
+ ApiSelector.JSON: GcsJsonApi
+ }
+ s3_class_map = {
+ ApiSelector.XML: BotoTranslation
+ }
+ class_map = {
+ 'gs': gs_class_map,
+ 's3': s3_class_map
+ }
+ return class_map
+
+
+class GsutilApiMapFactory(object):
+ """Factory the generates the default gsutil API map.
+
+ The API map determines which Cloud API implementation is used for a
given
+ command. A valid API map is defined as:
+ {
+ (key) ApiMapConstants.API_MAP : (value) Gsutil API class map (as
+ described in GsutilApiClassMapFactory comments).
+ (key) ApiMapConstants.SUPPORT_MAP : (value) {
+ (key) Provider prefix used in URI strings.
+ (value) list of ApiSelectors supported by the command for this
provider.
+ }
+ (key) ApiMapConstants.DEFAULT_MAP : (value) {
+ (key) Provider prefix used in URI strings.
+ (value) Default ApiSelector for this command and provider.
+ }
+ }
+ """
+
+ @classmethod
+ def GetApiMap(cls, gsutil_api_class_map_factory, support_map,
default_map):
+ """Creates a GsutilApiMap for use by the command from the inputs.
+
+ Args:
+ gsutil_api_class_map_factory: Factory defining a GetClassMap()
function
+ adhering to GsutilApiClassMapFactory
+ semantics.
+ support_map: Entries for ApiMapConstants.SUPPORT_MAP as described
above.
+ default_map: Entries for ApiMapConstants.DEFAULT_MAP as described
above.
+
+ Returns:
+ GsutilApiMap generated from the inputs.
+ """
+ return {
+ ApiMapConstants.API_MAP:
gsutil_api_class_map_factory.GetClassMap(),
+ ApiMapConstants.SUPPORT_MAP: support_map,
+ ApiMapConstants.DEFAULT_MAP: default_map
+ }
=======================================
--- /dev/null
+++ /third_party/gsutil/gslib/daisy_chain_wrapper.py Mon Nov 3 12:38:28
2014 UTC
@@ -0,0 +1,243 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Wrapper for use in daisy-chained copies."""
+
+from collections import deque
+import os
+import threading
+import time
+
+from gslib.cloud_api import BadRequestException
+from gslib.cloud_api import CloudApi
+from gslib.util import CreateLock
+from gslib.util import TRANSFER_BUFFER_SIZE
+
+
+# This controls the amount of bytes downloaded per download request.
+# We do not buffer this many bytes in memory at a time - that is
controlled by
+# DaisyChainWrapper.max_buffer_size. This is the upper bound of bytes that
may
+# be unnecessarily downloaded if there is a break in the resumable upload.
+DAISY_CHAIN_CHUNK_SIZE = 1024*1024*100
+
+
+class BufferWrapper(object):
+ """Wraps the download file pointer to use our in-memory buffer."""
+
+ def __init__(self, daisy_chain_wrapper):
+ """Provides a buffered write interface for a file download.
+
+ Args:
+ daisy_chain_wrapper: DaisyChainWrapper instance to use for buffer and
+ locking.
+ """
+ self.daisy_chain_wrapper = daisy_chain_wrapper
+
+ def write(self, data): # pylint: disable=invalid-name
+ """Waits for space in the buffer, then writes data to the buffer."""
+ while True:
+ with self.daisy_chain_wrapper.lock:
+ if (self.daisy_chain_wrapper.bytes_buffered <
+ self.daisy_chain_wrapper.max_buffer_size):
+ break
+ # Buffer was full, yield thread priority so the upload can pull from
it.
+ time.sleep(0)
+ data_len = len(data)
+ with self.daisy_chain_wrapper.lock:
+ self.daisy_chain_wrapper.buffer.append(data)
+ self.daisy_chain_wrapper.bytes_buffered += data_len
+
+
+class DaisyChainWrapper(object):
+ """Wrapper class for daisy-chaining a cloud download to an upload.
+
+ This class instantiates a BufferWrapper object to buffer the download
into
+ memory, consuming a maximum of max_buffer_size. It implements intelligent
+ behavior around read and seek that allow for all of the operations
necessary
+ to copy a file.
+
+ This class is coupled with the XML and JSON implementations in that it
+ expects that small buffers (maximum of TRANSFER_BUFFER_SIZE) in size
will be
+ used.
+ """
+
+ def __init__(self, src_url, src_obj_size, gsutil_api):
+ """Initializes the daisy chain wrapper.
+
+ Args:
+ src_url: Source CloudUrl to copy from.
+ src_obj_size: Size of source object.
+ gsutil_api: gsutil Cloud API to use for the copy.
+ """
+ # Current read position for the upload file pointer.
+ self.position = 0
+ self.buffer = deque()
+
+ self.bytes_buffered = 0
+ self.max_buffer_size = 1024 * 1024 # 1 MB
+
+ # We save one buffer's worth of data as a special case for boto,
+ # which seeks back one buffer and rereads to compute hashes. This is
+ # unnecessary because we can just compare cloud hash digests at the
end,
+ # but it allows this to work without modfiying boto.
+ self.last_position = 0
+ self.last_data = None
+
+ # Protects buffer, position, bytes_buffered, last_position, and
last_data.
+ self.lock = CreateLock()
+
+ self.src_obj_size = src_obj_size
+ self.src_url = src_url
+
+ # This is safe to use the upload and download thread because the
download
+ # thread calls only GetObjectMedia, which creates a new HTTP connection
+ # independent of gsutil_api. Thus, it will not share an HTTP connection
+ # with the upload.
+ self.gsutil_api = gsutil_api
+
+ self.download_thread = None
+ self.stop_download = threading.Event()
+ self.StartDownloadThread()
+
+ def StartDownloadThread(self, start_byte=0):
+ """Starts the download thread for the source object (from
start_byte)."""
+
+ def PerformDownload(start_byte):
+ """Downloads the source object in chunks.
+
+ This function checks the stop_download event and exits early if it
is set.
+ It should be set when there is an error during the daisy-chain
upload,
+ then this function can be called again with the upload's current
position
+ as start_byte.
+
+ Args:
+ start_byte: Byte from which to begin the download.
+ """
+ # TODO: Support resumable downloads. This would require the
BufferWrapper
+ # object to support seek() and tell() which requires coordination
with
+ # the upload.
+ while start_byte + DAISY_CHAIN_CHUNK_SIZE < self.src_obj_size:
+ self.gsutil_api.GetObjectMedia(
+ self.src_url.bucket_name, self.src_url.object_name,
+ BufferWrapper(self), start_byte=start_byte,
+ end_byte=start_byte + DAISY_CHAIN_CHUNK_SIZE - 1,
+ generation=self.src_url.generation,
object_size=self.src_obj_size,
+ download_strategy=CloudApi.DownloadStrategy.ONE_SHOT,
+ provider=self.src_url.scheme)
+ if self.stop_download.is_set():
+ # Download thread needs to be restarted, so exit.
+ self.stop_download.clear()
+ return
+ start_byte += DAISY_CHAIN_CHUNK_SIZE
+ self.gsutil_api.GetObjectMedia(
+ self.src_url.bucket_name, self.src_url.object_name,
+ BufferWrapper(self), start_byte=start_byte,
+ generation=self.src_url.generation,
object_size=self.src_obj_size,
+ download_strategy=CloudApi.DownloadStrategy.ONE_SHOT,
+ provider=self.src_url.scheme)
+
+ # TODO: If we do gzip encoding transforms mid-transfer, this will fail.
+ self.download_thread = threading.Thread(target=PerformDownload,
+ args=(start_byte,))
+ self.download_thread.start()
+
+ def read(self, amt=None): # pylint: disable=invalid-name
+ """Exposes a stream from the in-memory buffer to the upload."""
+ if self.position == self.src_obj_size or amt == 0:
+ # If there is no data left or 0 bytes were requested, return an empty
+ # string so callers can call still call len() and read(0).
+ return ''
+ if amt is None or amt > TRANSFER_BUFFER_SIZE:
+ raise BadRequestException(
+ 'Invalid HTTP read size %s during daisy chain operation, '
+ 'expected <= %s.' % (amt, TRANSFER_BUFFER_SIZE))
+ while True:
+ with self.lock:
+ if self.buffer:
+ break
+ # Buffer was empty, yield thread priority so the download thread can
fill.
+ time.sleep(0)
+ with self.lock:
+ data = self.buffer.popleft()
+ self.last_position = self.position
+ self.last_data = data
+ data_len = len(data)
+ self.position += data_len
+ self.bytes_buffered -= data_len
+ if data_len > amt:
+ raise BadRequestException(
+ 'Invalid read during daisy chain operation, got data of size '
+ '%s, expected size %s.' % (data_len, amt))
+ return data
+
+ def tell(self): # pylint: disable=invalid-name
+ with self.lock:
+ return self.position
+
+ def seek(self, offset, whence=os.SEEK_SET): # pylint:
disable=invalid-name
+ restart_download = False
+ if whence == os.SEEK_END:
+ if offset:
+ raise BadRequestException(
+ 'Invalid seek during daisy chain operation. Non-zero
offset %s '
+ 'from os.SEEK_END is not supported' % offset)
+ with self.lock:
+ self.last_position = self.position
+ self.last_data = None
+ # Safe because we check position against src_obj_size in read.
+ self.position = self.src_obj_size
+ elif whence == os.SEEK_SET:
+ with self.lock:
+ if offset == self.position:
+ pass
+ elif offset == self.last_position:
+ self.position = self.last_position
+ if self.last_data:
+ # If we seek to end and then back, we won't have last_data;
we'll
+ # get it on the next call to read.
+ self.buffer.appendleft(self.last_data)
+ self.bytes_buffered += len(self.last_data)
+ else:
+ # Once a download is complete, boto seeks to 0 and re-reads to
+ # compute the hash if an md5 isn't already present (for example
a GCS
+ # composite object), so we have to re-download the whole object.
+ # Also, when daisy-chaining to a resumable upload, on error the
+ # service may have received any number of the bytes; the download
+ # needs to be restarted from that point.
+ restart_download = True
+
+ if restart_download:
+ self.stop_download.set()
+
+ # Consume any remaining bytes in the download thread so that
+ # the thread can exit, then restart the thread at the desired
position.
+ while self.download_thread.is_alive():
+ with self.lock:
+ while self.bytes_buffered:
+ self.bytes_buffered -= len(self.buffer.popleft())
+ time.sleep(0)
+
+ with self.lock:
+ self.position = offset
+ self.buffer = deque()
+ self.bytes_buffered = 0
+ self.last_position = 0
+ self.last_data = None
+ self.StartDownloadThread(start_byte=offset)
+ else:
+ raise IOError('Daisy-chain download wrapper does not support '
+ 'seek mode %s' % whence)
+
+ def seekable(self): # pylint: disable=invalid-name
+ return True
=======================================
--- /dev/null
+++ /third_party/gsutil/gslib/gcs_json_api.py Mon Nov 3 12:38:28 2014 UTC
@@ -0,0 +1,1136 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""JSON gsutil Cloud API implementation for Google Cloud Storage."""
+
+from __future__ import absolute_import
+
+import httplib
+import json
+import socket
+import ssl
+import time
+
+import boto
+from boto import config
+from gcs_oauth2_boto_plugin import oauth2_helper
+import httplib2
+from oauth2client import multistore_file
+
+from gslib.cloud_api import AccessDeniedException
+from gslib.cloud_api import ArgumentException
+from gslib.cloud_api import BadRequestException
+from gslib.cloud_api import CloudApi
+from gslib.cloud_api import NotEmptyException
+from gslib.cloud_api import NotFoundException
+from gslib.cloud_api import PreconditionException
+from gslib.cloud_api import Preconditions
+from gslib.cloud_api import ResumableDownloadException
+from gslib.cloud_api import ResumableUploadAbortException
+from gslib.cloud_api import ResumableUploadException
+from gslib.cloud_api import ServiceException
+from gslib.cloud_api_helper import ValidateDstObjectMetadata
+from gslib.cred_types import CredTypes
+from gslib.exception import CommandException
+from gslib.gcs_json_media import BytesTransferredContainer
+from gslib.gcs_json_media import DownloadCallbackConnectionClassFactory
+from gslib.gcs_json_media import HttpWithDownloadStream
+from gslib.gcs_json_media import UploadCallbackConnectionClassFactory
+from gslib.gcs_json_media import WrapDownloadHttpRequest
+from gslib.gcs_json_media import WrapUploadHttpRequest
+from gslib.no_op_credentials import NoOpCredentials
+from gslib.project_id import PopulateProjectId
+from gslib.third_party.storage_apitools import credentials_lib as
credentials_lib
+from gslib.third_party.storage_apitools import encoding as encoding
+from gslib.third_party.storage_apitools import exceptions as
apitools_exceptions
+from gslib.third_party.storage_apitools import http_wrapper as
apitools_http_wrapper
+from gslib.third_party.storage_apitools import storage_v1_client as
apitools_client
+from gslib.third_party.storage_apitools import storage_v1_messages as
apitools_messages
+from gslib.third_party.storage_apitools import transfer as
apitools_transfer
+from gslib.third_party.storage_apitools.util import CalculateWaitForRetry
+from gslib.translation_helper import CreateBucketNotFoundException
+from gslib.translation_helper import CreateObjectNotFoundException
+from gslib.translation_helper import DEFAULT_CONTENT_TYPE
+from gslib.translation_helper import REMOVE_CORS_CONFIG
+from gslib.util import GetCertsFile
+from gslib.util import GetCredentialStoreFilename
+from gslib.util import GetMaxRetryDelay
+from gslib.util import GetNewHttp
+from gslib.util import GetNumRetries
+
+
+# Implementation supports only 'gs' URLs, so provider is unused.
+# pylint: disable=unused-argument
+
+DEFAULT_GCS_JSON_VERSION = 'v1'
+
+NUM_BUCKETS_PER_LIST_PAGE = 100
+NUM_OBJECTS_PER_LIST_PAGE = 500
+
+
+# Resumable downloads and uploads make one HTTP call per chunk (and must be
+# in multiples of 256KB). Overridable for testing.
+def _ResumableChunkSize():
+ chunk_size = config.getint('GSUtil', 'json_resumable_chunk_size',
+ 1024*1024*100L)
+ if chunk_size == 0:
+ chunk_size = 1024*256L
+ elif chunk_size % 1024*256L != 0:
+ chunk_size += (1024*256L - (chunk_size % 1024*256L))
+ return chunk_size
+
+TRANSLATABLE_APITOOLS_EXCEPTIONS = (apitools_exceptions.HttpError,
+ apitools_exceptions.TransferError,
+
apitools_exceptions.TransferInvalidError)
+
+# TODO: Distribute these exceptions better through apitools and here.
+# Right now, apitools is configured not to handle any exceptions on
+# uploads/downloads.
+# oauth2_client tries to JSON-decode the response, which can result
+# in a ValueError if the response was invalid. Until that is fixed in
+# oauth2_client, need to handle it here.
+HTTP_TRANSFER_EXCEPTIONS = (apitools_exceptions.TransferRetryError,
+ apitools_exceptions.BadStatusCodeError,
+ # TODO: Honor retry-after headers.
+ apitools_exceptions.RetryAfterError,
+ apitools_exceptions.RequestError,
+ httplib.BadStatusLine,
+ httplib.IncompleteRead,
+ httplib.ResponseNotReady,
+ httplib2.ServerNotFoundError,
+ socket.error,
+ socket.gaierror,
+ socket.timeout,
+ ssl.SSLError,
+ ValueError)
+
+
+_VALIDATE_CERTIFICATES_503_MESSAGE = (
+ """Service Unavailable. If you have recently changed
+ https_validate_certificates from True to False in your boto
configuration
+ file, please delete any cached access tokens in your filesystem (at %s)
+ and try again.""" % GetCredentialStoreFilename())
+
+
+class GcsJsonApi(CloudApi):
+ """Google Cloud Storage JSON implementation of gsutil Cloud API."""
+
+ def __init__(self, bucket_storage_uri_class, logger, provider=None,
+ credentials=None, debug=0):
+ """Performs necessary setup for interacting with Google Cloud Storage.
+
+ Args:
+ bucket_storage_uri_class: Unused.
+ logger: logging.logger for outputting log messages.
+ provider: Unused. This implementation supports only Google Cloud
Storage.
+ credentials: Credentials to be used for interacting with Google Cloud
+ Storage.
+ debug: Debug level for the API implementation (0..3).
+ """
+ # TODO: Plumb host_header for perfdiag / test_perfdiag.
+ # TODO: Add jitter to apitools' http_wrapper retry mechanism.
+ super(GcsJsonApi, self).__init__(bucket_storage_uri_class, logger,
+ provider='gs', debug=debug)
+ no_op_credentials = False
+ if not credentials:
+ loaded_credentials = self._CheckAndGetCredentials(logger)
+
+ if not loaded_credentials:
+ loaded_credentials = NoOpCredentials()
+ no_op_credentials = True
+ else:
+ if isinstance(credentials, NoOpCredentials):
+ no_op_credentials = True
+
+ self.credentials = credentials or loaded_credentials
+
+ self.certs_file = GetCertsFile()
+
+ self.http = GetNewHttp()
+ self.http_base = 'https://'
+ gs_json_host = config.get('Credentials', 'gs_json_host', None)
+ self.host_base = gs_json_host or 'www.googleapis.com'
+
+ if not gs_json_host:
+ gs_host = config.get('Credentials', 'gs_host', None)
+ if gs_host:
+ raise ArgumentException(
+ 'JSON API is selected but gs_json_host is not configured, '
+ 'while gs_host is configured to %s. Please also configure '
+ 'gs_json_host and gs_json_port to match your desired endpoint.'
+ % gs_host)
+
+ gs_json_port = config.get('Credentials', 'gs_json_port', None)
+
+ if not gs_json_port:
+ gs_port = config.get('Credentials', 'gs_port', None)
+ if gs_port:
+ raise ArgumentException(
+ 'JSON API is selected but gs_json_port is not configured, '
+ 'while gs_port is configured to %s. Please also configure '
+ 'gs_json_host and gs_json_port to match your desired endpoint.'
+ % gs_port)
+ self.host_port = ''
+ else:
+ self.host_port = ':' + config.get('Credentials', 'gs_json_port')
+
+ self.api_version = config.get('GSUtil', 'json_api_version',
+ DEFAULT_GCS_JSON_VERSION)
+ self.url_base = (self.http_base + self.host_base + self.host_port
+ '/' +
+ 'storage/' + self.api_version + '/')
+
+ self.credentials.set_store(
+ multistore_file.get_credential_storage_custom_string_key(
+ GetCredentialStoreFilename(), self.api_version))
+
+ self.num_retries = GetNumRetries()
+
+ log_request = (debug >= 3)
+ log_response = (debug >= 3)
+
+ self.api_client = apitools_client.StorageV1(
+ url=self.url_base, http=self.http, log_request=log_request,
+ log_response=log_response, credentials=self.credentials,
+ version=self.api_version)
+
+ if no_op_credentials:
+ # This API key is not secret and is used to identify gsutil during
+ # anonymous requests.
+ self.api_client.AddGlobalParam('key',
+
u'AIzaSyDnacJHrKma0048b13sh8cgxNUwulubmJM')
+
+ def _CheckAndGetCredentials(self, logger):
+ configured_cred_types = []
+ try:
+ if self._HasOauth2UserAccountCreds():
+ configured_cred_types.append(CredTypes.OAUTH2_USER_ACCOUNT)
+ if self._HasOauth2ServiceAccountCreds():
+ configured_cred_types.append(CredTypes.OAUTH2_SERVICE_ACCOUNT)
+ if len(configured_cred_types) > 1:
+ # We only allow one set of configured credentials. Otherwise, we're
+ # choosing one arbitrarily, which can be very confusing to the user
+ # (e.g., if only one is authorized to perform some action) and can
+ # also mask errors.
+ # Because boto merges config files, GCE credentials show up by
default
+ # for GCE VMs. We don't want to fail when a user creates a boto
file
+ # with their own credentials, so in this case we'll use the OAuth2
+ # user credentials.
+ failed_cred_type = None
+ raise CommandException(
+ ('You have multiple types of configured credentials (%s),
which is '
+ 'not supported. For more help, see "gsutil help creds".')
+ % configured_cred_types)
+
+ failed_cred_type = CredTypes.OAUTH2_USER_ACCOUNT
+ user_creds = self._GetOauth2UserAccountCreds()
+ failed_cred_type = CredTypes.OAUTH2_SERVICE_ACCOUNT
+ service_account_creds = self._GetOauth2ServiceAccountCreds()
+ failed_cred_type = CredTypes.GCE
+ gce_creds = self._GetGceCreds()
+ return user_creds or service_account_creds or gce_creds
+ except: # pylint: disable=bare-except
+
+ # If we didn't actually try to authenticate because there were
multiple
+ # types of configured credentials, don't emit this warning.
+ if failed_cred_type:
+ logger.warn(
+ 'Your "%s" credentials are invalid. For more help, see '
+ '"gsutil help creds", or re-run the gsutil config command
(see '
+ '"gsutil help config").', failed_cred_type)
+
+ # If there's any set of configured credentials, we'll fail if they're
+ # invalid, rather than silently falling back to anonymous config (as
+ # boto does). That approach leads to much confusion if users don't
+ # realize their credentials are invalid.
+ raise
+
+ def _HasOauth2ServiceAccountCreds(self):
+ return (config.has_option('Credentials', 'gs_service_client_id') and
+ config.has_option('Credentials', 'gs_service_key_file'))
+
+ def _HasOauth2UserAccountCreds(self):
+ return config.has_option('Credentials', 'gs_oauth2_refresh_token')
+
+ def _HasGceCreds(self):
+ return config.has_option('GoogleCompute', 'service_account')
+
+ def _GetOauth2ServiceAccountCreds(self):
+ if self._HasOauth2ServiceAccountCreds():
+ return oauth2_helper.OAuth2ClientFromBotoConfig(
+ boto.config,
+ cred_type=CredTypes.OAUTH2_SERVICE_ACCOUNT).GetCredentials()
+
+ def _GetOauth2UserAccountCreds(self):
+ if self._HasOauth2UserAccountCreds():
+ return oauth2_helper.OAuth2ClientFromBotoConfig(
+ boto.config).GetCredentials()
+
+ def _GetGceCreds(self):
+ if self._HasGceCreds():
+ try:
+ return credentials_lib.GceAssertionCredentials()
+ except apitools_exceptions.ResourceUnavailableError, e:
+ if 'service account' in str(e) and 'does not exist' in str(e):
+ return None
+ raise
+
+ def _GetNewDownloadHttp(self, download_stream):
+ return GetNewHttp(http_class=HttpWithDownloadStream,
stream=download_stream)
+
+ def GetBucket(self, bucket_name, provider=None, fields=None):
+ """See CloudApi class for function doc strings."""
+ projection = (apitools_messages.StorageBucketsGetRequest
+ .ProjectionValueValuesEnum.full)
+ apitools_request = apitools_messages.StorageBucketsGetRequest(
+ bucket=bucket_name, projection=projection)
+ global_params = apitools_messages.StandardQueryParameters()
+ if fields:
+ global_params.fields = ','.join(set(fields))
+
+ # Here and in list buckets, we have no way of knowing
+ # whether we requested a field and didn't get it because it didn't
exist
+ # or because we didn't have permission to access it.
+ try:
+ return self.api_client.buckets.Get(apitools_request,
+ global_params=global_params)
+ except TRANSLATABLE_APITOOLS_EXCEPTIONS, e:
+ self._TranslateExceptionAndRaise(e, bucket_name=bucket_name)
+
+ def PatchBucket(self, bucket_name, metadata, preconditions=None,
+ provider=None, fields=None):
+ """See CloudApi class for function doc strings."""
+ projection = (apitools_messages.StorageBucketsPatchRequest
+ .ProjectionValueValuesEnum.full)
+ bucket_metadata = metadata
+
+ if not preconditions:
+ preconditions = Preconditions()
+
+ # For blank metadata objects, we need to explicitly call
+ # them out to apitools so it will send/erase them.
+ apitools_include_fields = []
+ for metadata_field in
('metadata', 'lifecycle', 'logging', 'versioning',
+ 'website'):
+ attr = getattr(bucket_metadata, metadata_field, None)
+ if attr and not encoding.MessageToDict(attr):
+ setattr(bucket_metadata, metadata_field, None)
+ apitools_include_fields.append(metadata_field)
+
+ if bucket_metadata.cors and bucket_metadata.cors == REMOVE_CORS_CONFIG:
+ bucket_metadata.cors = []
+ apitools_include_fields.append('cors')
+
+ apitools_request = apitools_messages.StorageBucketsPatchRequest(
+ bucket=bucket_name, bucketResource=bucket_metadata,
+ projection=projection,
+ ifMetagenerationMatch=preconditions.meta_gen_match)
+ global_params = apitools_messages.StandardQueryParameters()
+ if fields:
+ global_params.fields = ','.join(set(fields))
+ with self.api_client.IncludeFields(apitools_include_fields):
+ try:
+ return self.api_client.buckets.Patch(apitools_request,
+ global_params=global_params)
+ except TRANSLATABLE_APITOOLS_EXCEPTIONS, e:
+ self._TranslateExceptionAndRaise(e)
+
+ def CreateBucket(self, bucket_name, project_id=None, metadata=None,
+ provider=None, fields=None):
+ """See CloudApi class for function doc strings."""
+ projection = (apitools_messages.StorageBucketsInsertRequest
+ .ProjectionValueValuesEnum.full)
+ if not metadata:
+ metadata = apitools_messages.Bucket()
+ metadata.name = bucket_name
+
+ if metadata.location:
+ metadata.location = metadata.location.upper()
+ if metadata.storageClass:
+ metadata.storageClass = metadata.storageClass.upper()
+
+ project_id = PopulateProjectId(project_id)
+
+ apitools_request = apitools_messages.StorageBucketsInsertRequest(
+ bucket=metadata, project=project_id, projection=projection)
+ global_params = apitools_messages.StandardQueryParameters()
+ if fields:
+ global_params.fields = ','.join(set(fields))
+ try:
+ return self.api_client.buckets.Insert(apitools_request,
+ global_params=global_params)
+ except TRANSLATABLE_APITOOLS_EXCEPTIONS, e:
+ self._TranslateExceptionAndRaise(e, bucket_name=bucket_name)
+
+ def DeleteBucket(self, bucket_name, preconditions=None, provider=None):
+ """See CloudApi class for function doc strings."""
+ if not preconditions:
+ preconditions = Preconditions()
+
+ apitools_request = apitools_messages.StorageBucketsDeleteRequest(
+ bucket=bucket_name,
ifMetagenerationMatch=preconditions.meta_gen_match)
+
+ try:
+ self.api_client.buckets.Delete(apitools_request)
+ except TRANSLATABLE_APITOOLS_EXCEPTIONS, e:
+ if isinstance(
+ self._TranslateApitoolsException(e, bucket_name=bucket_name),
+ NotEmptyException):
+ # If bucket is not empty, check to see if versioning is enabled and
+ # signal that in the exception if it is.
+ bucket_metadata = self.GetBucket(bucket_name,
+ fields=['versioning'])
+ if bucket_metadata.versioning and
bucket_metadata.versioning.enabled:
+ raise NotEmptyException('VersionedBucketNotEmpty',
+ status=e.status_code)
+ self._TranslateExceptionAndRaise(e, bucket_name=bucket_name)
+
+ def ListBuckets(self, project_id=None, provider=None, fields=None):
+ """See CloudApi class for function doc strings."""
+ projection = (apitools_messages.StorageBucketsListRequest
+ .ProjectionValueValuesEnum.full)
+ project_id = PopulateProjectId(project_id)
+
+ apitools_request = apitools_messages.StorageBucketsListRequest(
+ project=project_id, maxResults=NUM_BUCKETS_PER_LIST_PAGE,
+ projection=projection)
+ global_params = apitools_messages.StandardQueryParameters()
+ if fields:
+ if 'nextPageToken' not in fields:
+ fields.add('nextPageToken')
+ global_params.fields = ','.join(set(fields))
+ try:
+ bucket_list = self.api_client.buckets.List(apitools_request,
+
global_params=global_params)
+ except TRANSLATABLE_APITOOLS_EXCEPTIONS, e:
+ self._TranslateExceptionAndRaise(e)
+
+ for bucket in self._YieldBuckets(bucket_list):
+ yield bucket
+
+ while bucket_list.nextPageToken:
+ apitools_request = apitools_messages.StorageBucketsListRequest(
+ project=project_id, pageToken=bucket_list.nextPageToken,
+ maxResults=NUM_BUCKETS_PER_LIST_PAGE, projection=projection)
+ try:
+ bucket_list = self.api_client.buckets.List(apitools_request,
+
global_params=global_params)
+ except TRANSLATABLE_APITOOLS_EXCEPTIONS, e:
+ self._TranslateExceptionAndRaise(e)
+
+ for bucket in self._YieldBuckets(bucket_list):
+ yield bucket
+
+ def _YieldBuckets(self, bucket_list):
+ """Yields buckets from a list returned by apitools."""
+ if bucket_list.items:
+ for bucket in bucket_list.items:
+ yield bucket
+
+ def ListObjects(self, bucket_name, prefix=None, delimiter=None,
+ all_versions=None, provider=None, fields=None):
+ """See CloudApi class for function doc strings."""
+ projection = (apitools_messages.StorageObjectsListRequest
+ .ProjectionValueValuesEnum.full)
+ apitools_request = apitools_messages.StorageObjectsListRequest(
+ bucket=bucket_name, prefix=prefix, delimiter=delimiter,
+ versions=all_versions, projection=projection,
+ maxResults=NUM_OBJECTS_PER_LIST_PAGE)
+ global_params = apitools_messages.StandardQueryParameters()
+
+ if fields:
+ fields = set(fields)
+ if 'nextPageToken' not in fields:
+ fields.add('nextPageToken')
+ global_params.fields = ','.join(fields)
+
+ try:
+ object_list = self.api_client.objects.List(apitools_request,
+
global_params=global_params)
+ except TRANSLATABLE_APITOOLS_EXCEPTIONS, e:
+ self._TranslateExceptionAndRaise(e, bucket_name=bucket_name)
+
+ for object_or_prefix in self._YieldObjectsAndPrefixes(object_list):
+ yield object_or_prefix
+
+ while object_list.nextPageToken:
+ apitools_request = apitools_messages.StorageObjectsListRequest(
+ bucket=bucket_name, prefix=prefix, delimiter=delimiter,
+ versions=all_versions, projection=projection,
+ pageToken=object_list.nextPageToken,
+ maxResults=NUM_OBJECTS_PER_LIST_PAGE)
+ try:
+ object_list = self.api_client.objects.List(apitools_request,
+
global_params=global_params)
+ except TRANSLATABLE_APITOOLS_EXCEPTIONS, e:
+ self._TranslateExceptionAndRaise(e, bucket_name=bucket_name)
+
+ for object_or_prefix in self._YieldObjectsAndPrefixes(object_list):
+ yield object_or_prefix
+
+ def _YieldObjectsAndPrefixes(self, object_list):
+ if object_list.items:
+ for cloud_obj in object_list.items:
+ yield CloudApi.CsObjectOrPrefix(cloud_obj,
+
CloudApi.CsObjectOrPrefixType.OBJECT)
+ if object_list.prefixes:
+ for prefix in object_list.prefixes:
+ yield CloudApi.CsObjectOrPrefix(prefix,
+
CloudApi.CsObjectOrPrefixType.PREFIX)
+
+ def GetObjectMetadata(self, bucket_name, object_name, generation=None,
+ provider=None, fields=None):
+ """See CloudApi class for function doc strings."""
+ projection = (apitools_messages.StorageObjectsGetRequest
+ .ProjectionValueValuesEnum.full)
+
+ if generation:
+ generation = long(generation)
+
+ apitools_request = apitools_messages.StorageObjectsGetRequest(
+ bucket=bucket_name, object=object_name, projection=projection,
+ generation=generation)
+ global_params = apitools_messages.StandardQueryParameters()
+ if fields:
+ global_params.fields = ','.join(set(fields))
+
+ try:
+ return self.api_client.objects.Get(apitools_request,
+ global_params=global_params)
+ except TRANSLATABLE_APITOOLS_EXCEPTIONS, e:
+ self._TranslateExceptionAndRaise(e, bucket_name=bucket_name,
+ object_name=object_name,
+ generation=generation)
+
+ def GetObjectMedia(
+ self, bucket_name, object_name, download_stream,
+ provider=None, generation=None, object_size=None,
+ download_strategy=CloudApi.DownloadStrategy.ONE_SHOT, start_byte=0,
+ end_byte=None, progress_callback=None, serialization_data=None,
+ digesters=None):
+ """See CloudApi class for function doc strings."""
+ # This implementation will get the object metadata first if we don't
pass it
+ # in via serialization_data.
+ if generation:
+ generation = long(generation)
+
+ outer_total_size = object_size
+ if serialization_data:
+ outer_total_size = json.loads(serialization_data)['total_size']
+
+ if progress_callback:
+ if outer_total_size is None:
+ raise ArgumentException('Download size is required when callbacks
are '
+ 'requested for a download, but no size
was '
+ 'provided.')
+ progress_callback(0, outer_total_size)
+
+ bytes_downloaded_container = BytesTransferredContainer()
+ bytes_downloaded_container.bytes_transferred = start_byte
+
+ callback_class_factory = DownloadCallbackConnectionClassFactory(
+ bytes_downloaded_container, total_size=outer_total_size,
+ progress_callback=progress_callback, digesters=digesters)
+ download_http_class = callback_class_factory.GetConnectionClass()
+
+ download_http = self._GetNewDownloadHttp(download_stream)
+ download_http.connections = {'https': download_http_class}
+ authorized_download_http = self.credentials.authorize(download_http)
+ WrapDownloadHttpRequest(authorized_download_http)
+
+ if serialization_data:
+ apitools_download = apitools_transfer.Download.FromData(
+ download_stream, serialization_data, self.api_client.http)
+ else:
+ apitools_download = apitools_transfer.Download.FromStream(
+ download_stream, auto_transfer=False, total_size=object_size)
+
+ apitools_download.bytes_http = authorized_download_http
+ apitools_request = apitools_messages.StorageObjectsGetRequest(
+ bucket=bucket_name, object=object_name, generation=generation)
+
+ try:
+ if download_strategy == CloudApi.DownloadStrategy.RESUMABLE:
+ # Disable retries in apitools. We will handle them explicitly here.
+ apitools_download.retry_func = (
+ apitools_http_wrapper.RethrowExceptionHandler)
+ return self._PerformResumableDownload(
+ bucket_name, object_name, download_stream, apitools_request,
+ apitools_download, bytes_downloaded_container,
+ generation=generation, start_byte=start_byte,
end_byte=end_byte,
+ serialization_data=serialization_data)
+ else:
+ return self._PerformDownload(
+ bucket_name, object_name, download_stream, apitools_request,
+ apitools_download, generation=generation,
start_byte=start_byte,
+ end_byte=end_byte, serialization_data=serialization_data)
+ except TRANSLATABLE_APITOOLS_EXCEPTIONS, e:
+ self._TranslateExceptionAndRaise(e, bucket_name=bucket_name,
+ object_name=object_name,
+ generation=generation)
+
+ def _PerformResumableDownload(
+ self, bucket_name, object_name, download_stream, apitools_request,
+ apitools_download, bytes_downloaded_container, generation=None,
+ start_byte=0, end_byte=None, serialization_data=None):
+ retries = 0
+ last_progress_byte = start_byte
+ while retries <= self.num_retries:
+ try:
+ return self._PerformDownload(
+ bucket_name, object_name, download_stream, apitools_request,
+ apitools_download, generation=generation,
start_byte=start_byte,
+ end_byte=end_byte, serialization_data=serialization_data)
+ except HTTP_TRANSFER_EXCEPTIONS, e:
+ start_byte = download_stream.tell()
+ bytes_downloaded_container.bytes_transferred = start_byte
+ if start_byte > last_progress_byte:
+ # We've made progress, so allow a fresh set of retries.
+ last_progress_byte = start_byte
+ retries = 0
+ retries += 1
+ if retries > self.num_retries:
+ raise ResumableDownloadException(
+ 'Transfer failed after %d retries. Final exception: %s' %
+ self.num_retries, str(e))
+ time.sleep(CalculateWaitForRetry(retries,
max_wait=GetMaxRetryDelay()))
+ self.logger.info(
+ 'Retrying download from byte %s after exception.', start_byte)
+ apitools_http_wrapper.RebuildHttpConnections(
+ apitools_download.bytes_http)
+
+ def _PerformDownload(
+ self, bucket_name, object_name, download_stream, apitools_request,
+ apitools_download, generation=None, start_byte=0, end_byte=None,
+ serialization_data=None):
+ if not serialization_data:
+ try:
+ self.api_client.objects.Get(apitools_request,
+ download=apitools_download)
+ except TRANSLATABLE_APITOOLS_EXCEPTIONS, e:
+ self._TranslateExceptionAndRaise(e, bucket_name=bucket_name,
+ object_name=object_name,
+ generation=generation)
+
+ # Disable apitools' default print callbacks.
+ def _NoopCallback(unused_response, unused_download_object):
+ pass
+
+ # TODO: If we have a resumable download with accept-encoding:gzip
+ # on a object that is compressible but not in gzip form in the cloud,
+ # on-the-fly compression will gzip the object. In this case if our
+ # download breaks, future requests will ignore the range header and
just
+ # return the object (gzipped) in its entirety. Ideally, we would unzip
+ # the bytes that we have locally and send a range request without
+ # accept-encoding:gzip so that we can download only the (uncompressed)
bytes
+ # that we don't yet have.
+
+ # Since bytes_http is created in this function, we don't get the
+ # user-agent header from api_client's http automatically.
+ additional_headers = {
+ 'accept-encoding': 'gzip',
+ 'user-agent': self.api_client.user_agent
+ }
+ if start_byte or end_byte:
+ apitools_download.GetRange(additional_headers=additional_headers,
+ start=start_byte, end=end_byte)
+ else:
+ apitools_download.StreamInChunks(
+ callback=_NoopCallback, finish_callback=_NoopCallback,
+ additional_headers=additional_headers)
+ return apitools_download.encoding
+
+ def PatchObjectMetadata(self, bucket_name, object_name, metadata,
+ generation=None, preconditions=None,
provider=None,
+ fields=None):
+ """See CloudApi class for function doc strings."""
+ projection = (apitools_messages.StorageObjectsPatchRequest
+ .ProjectionValueValuesEnum.full)
+
+ if not preconditions:
+ preconditions = Preconditions()
+
+ if generation:
+ generation = long(generation)
+
+ apitools_request = apitools_messages.StorageObjectsPatchRequest(
+ bucket=bucket_name, object=object_name, objectResource=metadata,
+ generation=generation, projection=projection,
+ ifGenerationMatch=preconditions.gen_match,
+ ifMetagenerationMatch=preconditions.meta_gen_match)
+ global_params = apitools_messages.StandardQueryParameters()
+ if fields:
+ global_params.fields = ','.join(set(fields))
+
+ try:
+ return self.api_client.objects.Patch(apitools_request,
+ global_params=global_params)
+ except TRANSLATABLE_APITOOLS_EXCEPTIONS, e:
+ self._TranslateExceptionAndRaise(e, bucket_name=bucket_name,
+ object_name=object_name,
+ generation=generation)
+
+ def _UploadObject(self, upload_stream, object_metadata, canned_acl=None,
+ size=None, preconditions=None, provider=None,
fields=None,
+ serialization_data=None, tracker_callback=None,
+ progress_callback=None, apitools_strategy='simple'):
+ """Upload implementation, apitools_strategy plus gsutil Cloud API
args."""
+ ValidateDstObjectMetadata(object_metadata)
+ assert not canned_acl, 'Canned ACLs not supported by JSON API.'
+
+ bytes_uploaded_container = BytesTransferredContainer()
+
+ total_size = 0
+ if progress_callback and size:
+ total_size = size
+ progress_callback(0, size)
+
+ callback_class_factory = UploadCallbackConnectionClassFactory(
+ bytes_uploaded_container, total_size=total_size,
+ progress_callback=progress_callback)
+
+ upload_http = GetNewHttp()
+ upload_http_class = callback_class_factory.GetConnectionClass()
+ upload_http.connections = {'http': upload_http_class,
+ 'https': upload_http_class}
+
+ # Disable apitools' default print callbacks.
+ def _NoopCallback(unused_response, unused_upload_object):
+ pass
+
+ authorized_upload_http = self.credentials.authorize(upload_http)
+ WrapUploadHttpRequest(authorized_upload_http)
+ # Since bytes_http is created in this function, we don't get the
+ # user-agent header from api_client's http automatically.
+ additional_headers = {
+ 'user-agent': self.api_client.user_agent
+ }
+
+ try:
+ content_type = None
+ apitools_request = None
+ global_params = None
+ if not serialization_data:
+ # This is a new upload, set up initial upload state.
+ content_type = object_metadata.contentType
+ if not content_type:
+ content_type = DEFAULT_CONTENT_TYPE
+
+ if not preconditions:
+ preconditions = Preconditions()
+
+ apitools_request = apitools_messages.StorageObjectsInsertRequest(
+ bucket=object_metadata.bucket, object=object_metadata,
+ ifGenerationMatch=preconditions.gen_match,
+ ifMetagenerationMatch=preconditions.meta_gen_match)
+
+ global_params = apitools_messages.StandardQueryParameters()
+ if fields:
+ global_params.fields = ','.join(set(fields))
+
+ if apitools_strategy == 'simple': # One-shot upload.
+ apitools_upload = apitools_transfer.Upload(
+ upload_stream, content_type, total_size=size,
auto_transfer=True)
+ apitools_upload.strategy = apitools_strategy
+ apitools_upload.bytes_http = authorized_upload_http
+
+ return self.api_client.objects.Insert(
+ apitools_request,
+ upload=apitools_upload,
+ global_params=global_params)
+ else: # Resumable upload.
+ return self._PerformResumableUpload(
+ upload_stream, authorized_upload_http, content_type, size,
+ serialization_data, apitools_strategy, apitools_request,
+ global_params, bytes_uploaded_container, tracker_callback,
+ _NoopCallback, additional_headers)
+ except TRANSLATABLE_APITOOLS_EXCEPTIONS, e:
+ self._TranslateExceptionAndRaise(e,
bucket_name=object_metadata.bucket,
+ object_name=object_metadata.name)
+
+ def _PerformResumableUpload(
+ self, upload_stream, authorized_upload_http, content_type, size,
+ serialization_data, apitools_strategy, apitools_request,
global_params,
+ bytes_uploaded_container, tracker_callback, noop_callback,
addl_headers):
+ try:
+ if serialization_data:
+ # Resuming an existing upload.
+ apitools_upload = apitools_transfer.Upload.FromData(
+ upload_stream, serialization_data, self.api_client.http)
+ apitools_upload.chunksize = _ResumableChunkSize()
+ apitools_upload.bytes_http = authorized_upload_http
+ else:
+ # New resumable upload.
+ apitools_upload = apitools_transfer.Upload(
+ upload_stream, content_type, total_size=size,
+ chunksize=_ResumableChunkSize(), auto_transfer=False)
+ apitools_upload.strategy = apitools_strategy
+ apitools_upload.bytes_http = authorized_upload_http
+ self.api_client.objects.Insert(
+ apitools_request,
+ upload=apitools_upload,
+ global_params=global_params)
+ # Disable retries in apitools. We will handle them explicitly here.
+ apitools_upload.retry_func = (
+ apitools_http_wrapper.RethrowExceptionHandler)
+
+ # If we're resuming an upload, apitools has at this point received
+ # from the server how many bytes it already has. Update our
+ # callback class with this information.
+ bytes_uploaded_container.bytes_transferred = apitools_upload.progress
+ if tracker_callback:
+ tracker_callback(json.dumps(apitools_upload.serialization_data))
+
+ retries = 0
+ last_progress_byte = apitools_upload.progress
+ while retries <= self.num_retries:
+ try:
+ # TODO: On retry, this will seek to the bytes that the server
has,
+ # causing the hash to be recalculated. Make
HashingFileUploadWrapper
+ # save a digest according to json_resumable_chunk_size.
+ http_response = apitools_upload.StreamInChunks(
+ callback=noop_callback, finish_callback=noop_callback,
+ additional_headers=addl_headers)
+ return self.api_client.objects.ProcessHttpResponse(
+ self.api_client.objects.GetMethodConfig('Insert'),
http_response)
+ except HTTP_TRANSFER_EXCEPTIONS, e:
+ apitools_http_wrapper.RebuildHttpConnections(
+ apitools_upload.bytes_http)
+ while retries <= self.num_retries:
+ try:
+ # TODO: Simulate the refresh case in tests. Right now, our
+ # mocks are not complex enough to simulate a failure.
+ apitools_upload.RefreshResumableUploadState()
+ start_byte = apitools_upload.progress
+ bytes_uploaded_container.bytes_transferred = start_byte
+ break
+ except HTTP_TRANSFER_EXCEPTIONS, e2:
+ apitools_http_wrapper.RebuildHttpConnections(
+ apitools_upload.bytes_http)
+ retries += 1
+ if retries > self.num_retries:
+ raise ResumableUploadException(
+ 'Transfer failed after %d retries. Final
exception: %s' %
+ (self.num_retries, e2))
+ time.sleep(
+ CalculateWaitForRetry(retries,
max_wait=GetMaxRetryDelay()))
+ if start_byte > last_progress_byte:
+ # We've made progress, so allow a fresh set of retries.
+ last_progress_byte = start_byte
+ retries = 0
+ else:
+ retries += 1
+ if retries > self.num_retries:
+ raise ResumableUploadException(
+ 'Transfer failed after %d retries. Final exception: %s' %
+ (self.num_retries, e))
+ time.sleep(
+ CalculateWaitForRetry(retries,
max_wait=GetMaxRetryDelay()))
+ self.logger.info(
+ 'Retrying upload from byte %s after exception.', start_byte)
+ except TRANSLATABLE_APITOOLS_EXCEPTIONS, e:
+ resumable_ex = self._TranslateApitoolsResumableUploadException(e)
+ if resumable_ex:
+ raise resumable_ex
+ else:
+ raise
+
+ def UploadObject(self, upload_stream, object_metadata, canned_acl=None,
+ size=None, preconditions=None, progress_callback=None,
+ provider=None, fields=None):
+ """See CloudApi class for function doc strings."""
+ return self._UploadObject(
+ upload_stream, object_metadata, canned_acl=canned_acl,
+ size=size, preconditions=preconditions,
+ progress_callback=progress_callback, fields=fields,
+ apitools_strategy='simple')
+
+ def UploadObjectStreaming(self, upload_stream, object_metadata,
+ canned_acl=None, preconditions=None,
+ progress_callback=None, provider=None,
+ fields=None):
+ """See CloudApi class for function doc strings."""
+ # Streaming indicated by not passing a size.
+ return self._UploadObject(
+ upload_stream, object_metadata, canned_acl=canned_acl,
+ preconditions=preconditions, progress_callback=progress_callback,
+ fields=fields, apitools_strategy='simple')
+
+ def UploadObjectResumable(
+ self, upload_stream, object_metadata, canned_acl=None,
preconditions=None,
+ provider=None, fields=None, size=None, serialization_data=None,
+ tracker_callback=None, progress_callback=None):
+ """See CloudApi class for function doc strings."""
+ return self._UploadObject(
+ upload_stream, object_metadata, canned_acl=canned_acl,
+ preconditions=preconditions, fields=fields, size=size,
+ serialization_data=serialization_data,
+ tracker_callback=tracker_callback,
progress_callback=progress_callback,
+ apitools_strategy='resumable')
+
+ def CopyObject(self, src_bucket_name, src_obj_name, dst_obj_metadata,
+ src_generation=None, canned_acl=None, preconditions=None,
+ provider=None, fields=None):
+ """See CloudApi class for function doc strings."""
+ ValidateDstObjectMetadata(dst_obj_metadata)
+ assert not canned_acl, 'Canned ACLs not supported by JSON API.'
+
+ if src_generation:
+ src_generation = long(src_generation)
+
+ if not preconditions:
+ preconditions = Preconditions()
+
+ projection = (apitools_messages.StorageObjectsCopyRequest
+ .ProjectionValueValuesEnum.full)
+ global_params = apitools_messages.StandardQueryParameters()
+ if fields:
+ global_params.fields = ','.join(set(fields))
+
+ apitools_request = apitools_messages.StorageObjectsCopyRequest(
+ sourceBucket=src_bucket_name, sourceObject=src_obj_name,
+ destinationBucket=dst_obj_metadata.bucket,
+ destinationObject=dst_obj_metadata.name,
+ projection=projection, object=dst_obj_metadata,
+ sourceGeneration=src_generation,
+ ifGenerationMatch=preconditions.gen_match,
+ ifMetagenerationMatch=preconditions.meta_gen_match)
+ try:
+ return self.api_client.objects.Copy(apitools_request,
+ global_params=global_params)
+ except TRANSLATABLE_APITOOLS_EXCEPTIONS, e:
+ self._TranslateExceptionAndRaise(e,
bucket_name=dst_obj_metadata.bucket,
+ object_name=dst_obj_metadata.name)
+
+ def DeleteObject(self, bucket_name, object_name, preconditions=None,
+ generation=None, provider=None):
+ """See CloudApi class for function doc strings."""
+ if not preconditions:
+ preconditions = Preconditions()
+
+ if generation:
+ generation = long(generation)
+
+ apitools_request = apitools_messages.StorageObjectsDeleteRequest(
+ bucket=bucket_name, object=object_name, generation=generation,
+ ifGenerationMatch=preconditions.gen_match,
+ ifMetagenerationMatch=preconditions.meta_gen_match)
+ try:
+ return self.api_client.objects.Delete(apitools_request)
+ except TRANSLATABLE_APITOOLS_EXCEPTIONS, e:
+ self._TranslateExceptionAndRaise(e, bucket_name=bucket_name,
+ object_name=object_name,
+ generation=generation)
+
+ def ComposeObject(self, src_objs_metadata, dst_obj_metadata,
+ preconditions=None, provider=None, fields=None):
+ """See CloudApi class for function doc strings."""
+ ValidateDstObjectMetadata(dst_obj_metadata)
+
+ dst_obj_name = dst_obj_metadata.name
+ dst_obj_metadata.name = None
+ dst_bucket_name = dst_obj_metadata.bucket
+ dst_obj_metadata.bucket = None
+ if not dst_obj_metadata.contentType:
+ dst_obj_metadata.contentType = DEFAULT_CONTENT_TYPE
+
+ if not preconditions:
+ preconditions = Preconditions()
+
+ global_params = apitools_messages.StandardQueryParameters()
+ if fields:
+ global_params.fields = ','.join(set(fields))
+
+ src_objs_compose_request = apitools_messages.ComposeRequest(
+ sourceObjects=src_objs_metadata, destination=dst_obj_metadata)
+
+ apitools_request = apitools_messages.StorageObjectsComposeRequest(
+ composeRequest=src_objs_compose_request,
+ destinationBucket=dst_bucket_name,
+ destinationObject=dst_obj_name,
+ ifGenerationMatch=preconditions.gen_match,
+ ifMetagenerationMatch=preconditions.meta_gen_match)
+ try:
+ return self.api_client.objects.Compose(apitools_request,
+ global_params=global_params)
+ except TRANSLATABLE_APITOOLS_EXCEPTIONS, e:
+ self._TranslateExceptionAndRaise(e, bucket_name=dst_bucket_name,
+ object_name=dst_obj_name)
+
+ def WatchBucket(self, bucket_name, address, channel_id, token=None,
+ provider=None, fields=None):
+ """See CloudApi class for function doc strings."""
+ projection = (apitools_messages.StorageObjectsWatchAllRequest
+ .ProjectionValueValuesEnum.full)
+
+ channel = apitools_messages.Channel(address=address, id=channel_id,
+ token=token, type='WEB_HOOK')
+
+ apitools_request = apitools_messages.StorageObjectsWatchAllRequest(
+ bucket=bucket_name, channel=channel, projection=projection)
+
+ global_params = apitools_messages.StandardQueryParameters()
+ if fields:
+ global_params.fields = ','.join(set(fields))
+
+ try:
+ return self.api_client.objects.WatchAll(apitools_request,
+ global_params=global_params)
+ except TRANSLATABLE_APITOOLS_EXCEPTIONS, e:
+ self._TranslateExceptionAndRaise(e, bucket_name=bucket_name)
+
+ def StopChannel(self, channel_id, resource_id, provider=None):
+ """See CloudApi class for function doc strings."""
+ channel = apitools_messages.Channel(id=channel_id,
resourceId=resource_id)
+ try:
+ self.api_client.channels.Stop(channel)
+ except TRANSLATABLE_APITOOLS_EXCEPTIONS, e:
+ self._TranslateExceptionAndRaise(e)
***The diff for this file has been truncated for email.***
=======================================
--- /dev/null
+++ /third_party/gsutil/gslib/gcs_json_media.py Mon Nov 3 12:38:28 2014 UTC
@@ -0,0 +1,496 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Media helper functions and classes for Google Cloud Storage JSON API."""
+
+from __future__ import absolute_import
+
+import copy
+import cStringIO
+import httplib
+import socket
+import types
+import urlparse
+
+import httplib2
+from httplib2 import parse_uri
+
+from gslib.cloud_api import BadRequestException
+from gslib.progress_callback import ProgressCallbackWithBackoff
+from gslib.third_party.storage_apitools import exceptions as
apitools_exceptions
+from gslib.util import SSL_TIMEOUT
+from gslib.util import TRANSFER_BUFFER_SIZE
+
+
+class BytesTransferredContainer(object):
+ """Container class for passing number of bytes transferred to lower
layers.
+
+ For resumed transfers or connection rebuilds in the middle of a
transfer, we
+ need to rebuild the connection class with how much we've transferred so
far.
+ For uploads, we don't know the total number of bytes uploaded until we've
+ queried the server, but we need to create the connection class to pass to
+ httplib2 before we can query the server. This container object allows us
to
+ pass a reference into Upload/DownloadCallbackConnection.
+ """
+
+ def __init__(self):
+ self.__bytes_transferred = 0
+
+ @property
+ def bytes_transferred(self):
+ return self.__bytes_transferred
+
+ @bytes_transferred.setter
+ def bytes_transferred(self, value):
+ self.__bytes_transferred = value
+
+
+class UploadCallbackConnectionClassFactory(object):
+ """Creates a class that can override an httplib2 connection.
+
+ This is used to provide progress callbacks and disable dumping the upload
+ payload during debug statements. It can later be used to provide
on-the-fly
+ hash digestion during upload.
+ """
+
+ def __init__(self, bytes_uploaded_container,
+ buffer_size=TRANSFER_BUFFER_SIZE,
+ total_size=0, progress_callback=None):
+ self.bytes_uploaded_container = bytes_uploaded_container
+ self.buffer_size = buffer_size
+ self.total_size = total_size
+ self.progress_callback = progress_callback
+
+ def GetConnectionClass(self):
+ """Returns a connection class that overrides send."""
+ outer_bytes_uploaded_container = self.bytes_uploaded_container
+ outer_buffer_size = self.buffer_size
+ outer_total_size = self.total_size
+ outer_progress_callback = self.progress_callback
+
+ class UploadCallbackConnection(httplib2.HTTPSConnectionWithTimeout):
+ """Connection class override for uploads."""
+ bytes_uploaded_container = outer_bytes_uploaded_container
+ # After we instantiate this class, apitools will check with the
server
+ # to find out how many bytes remain for a resumable upload. This
allows
+ # us to update our progress once based on that number.
+ processed_initial_bytes = False
+ GCS_JSON_BUFFER_SIZE = outer_buffer_size
+ callback_processor = None
+ size = outer_total_size
+
+ def __init__(self, *args, **kwargs):
+ kwargs['timeout'] = SSL_TIMEOUT
+ httplib2.HTTPSConnectionWithTimeout.__init__(self, *args, **kwargs)
+
+ def send(self, data):
+ """Overrides HTTPConnection.send."""
+ if not self.processed_initial_bytes:
+ self.processed_initial_bytes = True
+ if outer_progress_callback:
+ self.callback_processor = ProgressCallbackWithBackoff(
+ outer_total_size, outer_progress_callback)
+ self.callback_processor.Progress(
+ self.bytes_uploaded_container.bytes_transferred)
+ # httplib.HTTPConnection.send accepts either a string or a
file-like
+ # object (anything that implements read()).
+ if isinstance(data, basestring):
+ full_buffer = cStringIO.StringIO(data)
+ else:
+ full_buffer = data
+ partial_buffer = full_buffer.read(self.GCS_JSON_BUFFER_SIZE)
+ while partial_buffer:
+ httplib2.HTTPSConnectionWithTimeout.send(self, partial_buffer)
+ send_length = len(partial_buffer)
+ if self.callback_processor:
+ # This is the only place where gsutil has control over making a
+ # callback, but here we can't differentiate the metadata bytes
+ # (such as headers and OAuth2 refreshes) sent during an upload
+ # from the actual upload bytes, so we will actually report
+ # slightly more bytes than desired to the callback handler.
+ #
+ # One considered/rejected alternative is to move the callbacks
+ # into the HashingFileUploadWrapper which only processes reads
on
+ # the bytes. This has the disadvantages of being removed from
+ # where we actually send the bytes and unnecessarily
+ # multi-purposing that class.
+ self.callback_processor.Progress(send_length)
+ partial_buffer = full_buffer.read(self.GCS_JSON_BUFFER_SIZE)
+
+ return UploadCallbackConnection
+
+
+def WrapUploadHttpRequest(upload_http):
+ """Wraps upload_http so we only use our custom connection_type on PUTs.
+
+ POSTs are used to refresh oauth tokens, and we don't want to process the
+ data sent in those requests.
+
+ Args:
+ upload_http: httplib2.Http instance to wrap
+ """
+ request_orig = upload_http.request
+ def NewRequest(uri, method='GET', body=None, headers=None,
+ redirections=httplib2.DEFAULT_MAX_REDIRECTS,
+ connection_type=None):
+ if method == 'PUT' or method == 'POST':
+ override_connection_type = connection_type
+ else:
+ override_connection_type = None
+ return request_orig(uri, method=method, body=body,
+ headers=headers, redirections=redirections,
+ connection_type=override_connection_type)
+ # Replace the request method with our own closure.
+ upload_http.request = NewRequest
+
+
+class DownloadCallbackConnectionClassFactory(object):
+ """Creates a class that can override an httplib2 connection.
+
+ This is used to provide progress callbacks, disable dumping the download
+ payload during debug statements, and provide on-the-fly hash digestion
during
+ download. On-the-fly digestion is particularly important because httplib2
+ will decompress gzipped content on-the-fly, thus this class provides our
+ only opportunity to calculate the correct hash for an object that has a
+ gzip hash in the cloud.
+ """
+
+ def __init__(self, bytes_downloaded_container,
+ buffer_size=TRANSFER_BUFFER_SIZE, total_size=0,
+ progress_callback=None, digesters=None):
+ self.buffer_size = buffer_size
+ self.total_size = total_size
+ self.progress_callback = progress_callback
+ self.digesters = digesters
+ self.bytes_downloaded_container = bytes_downloaded_container
+
+ def GetConnectionClass(self):
+ """Returns a connection class that overrides getresponse."""
+
+ class DownloadCallbackConnection(httplib2.HTTPSConnectionWithTimeout):
+ """Connection class override for downloads."""
+ outer_total_size = self.total_size
+ outer_digesters = self.digesters
+ outer_progress_callback = self.progress_callback
+ outer_bytes_downloaded_container = self.bytes_downloaded_container
+ processed_initial_bytes = False
+ callback_processor = None
+
+ def __init__(self, *args, **kwargs):
+ kwargs['timeout'] = SSL_TIMEOUT
+ httplib2.HTTPSConnectionWithTimeout.__init__(self, *args, **kwargs)
+
+ def getresponse(self, buffering=False):
+ """Wraps an HTTPResponse to perform callbacks and hashing.
+
+ In this function, self is a DownloadCallbackConnection.
+
+ Args:
+ buffering: Unused. This function uses a local buffer.
+
+ Returns:
+ HTTPResponse object with wrapped read function.
+ """
+ orig_response = httplib.HTTPConnection.getresponse(self)
+ if orig_response.status not in (httplib.OK,
httplib.PARTIAL_CONTENT):
+ return orig_response
+ orig_read_func = orig_response.read
+
+ def read(amt=None): # pylint: disable=invalid-name
+ """Overrides HTTPConnection.getresponse.read.
+
+ This function only supports reads of TRANSFER_BUFFER_SIZE or
smaller.
+
+ Args:
+ amt: Integer n where 0 < n <= TRANSFER_BUFFER_SIZE. This is a
+ keyword argument to match the read function it overrides,
+ but it is required.
+
+ Returns:
+ Data read from HTTPConnection.
+ """
+ if not amt or amt > TRANSFER_BUFFER_SIZE:
+ raise BadRequestException(
+ 'Invalid HTTP read size %s during download, expected %s.' %
+ (amt, TRANSFER_BUFFER_SIZE))
+ else:
+ amt = amt or TRANSFER_BUFFER_SIZE
+
+ if not self.processed_initial_bytes:
+ self.processed_initial_bytes = True
+ if self.outer_progress_callback:
+ self.callback_processor = ProgressCallbackWithBackoff(
+ self.outer_total_size, self.outer_progress_callback)
+ self.callback_processor.Progress(
+ self.outer_bytes_downloaded_container.bytes_transferred)
+
+ data = orig_read_func(amt)
+ read_length = len(data)
+ if self.callback_processor:
+ self.callback_processor.Progress(read_length)
+ if self.outer_digesters:
+ for alg in self.outer_digesters:
+ self.outer_digesters[alg].update(data)
+ return data
+ orig_response.read = read
+
+ return orig_response
+ return DownloadCallbackConnection
+
+
+def WrapDownloadHttpRequest(download_http):
+ """Overrides download request functions for an httplib2.Http object.
+
+ Args:
+ download_http: httplib2.Http.object to wrap / override.
+
+ Returns:
+ Wrapped / overridden httplib2.Http object.
+ """
+
+ # httplib2 has a bug
https://code.google.com/p/httplib2/issues/detail?id=305
+ # where custom connection_type is not respected after redirects. This
+ # function is copied from httplib2 and overrides the request function so
that
+ # the connection_type is properly passed through.
+ # pylint: disable=protected-access,g-inconsistent-quotes,unused-variable
+ # pylint: disable=g-equals-none,g-doc-return-or-yield
+ # pylint: disable=g-short-docstring-punctuation,g-doc-args
+ # pylint: disable=too-many-statements
+ def OverrideRequest(self, conn, host, absolute_uri, request_uri, method,
+ body, headers, redirections, cachekey):
+ """Do the actual request using the connection object.
+
+ Also follow one level of redirects if necessary.
+ """
+
+ auths = ([(auth.depth(request_uri), auth) for auth in
self.authorizations
+ if auth.inscope(host, request_uri)])
+ auth = auths and sorted(auths)[0][1] or None
+ if auth:
+ auth.request(method, request_uri, headers, body)
+
+ (response, content) = self._conn_request(conn, request_uri, method,
body,
+ headers)
+
+ if auth:
+ if auth.response(response, body):
+ auth.request(method, request_uri, headers, body)
+ (response, content) = self._conn_request(conn, request_uri, method,
+ body, headers)
+ response._stale_digest = 1
+
+ if response.status == 401:
+ for authorization in self._auth_from_challenge(
+ host, request_uri, headers, response, content):
+ authorization.request(method, request_uri, headers, body)
+ (response, content) = self._conn_request(conn, request_uri, method,
+ body, headers)
+ if response.status != 401:
+ self.authorizations.append(authorization)
+ authorization.response(response, body)
+ break
+
+ if (self.follow_all_redirects or (method in ["GET", "HEAD"])
+ or response.status == 303):
+ if self.follow_redirects and response.status in [300, 301, 302,
+ 303, 307]:
+ # Pick out the location header and basically start from the
beginning
+ # remembering first to strip the ETag header and decrement
our 'depth'
+ if redirections:
+ if not response.has_key('location') and response.status != 300:
+ raise httplib2.RedirectMissingLocation(
+ "Redirected but the response is missing a Location:
header.",
+ response, content)
+ # Fix-up relative redirects (which violate an RFC 2616 MUST)
+ if response.has_key('location'):
+ location = response['location']
+ (scheme, authority, path, query, fragment) =
parse_uri(location)
+ if authority == None:
+ response['location'] = urlparse.urljoin(absolute_uri,
location)
+ if response.status == 301 and method in ["GET", "HEAD"]:
+ response['-x-permanent-redirect-url'] = response['location']
+ if not response.has_key('content-location'):
+ response['content-location'] = absolute_uri
+ httplib2._updateCache(headers, response, content, self.cache,
+ cachekey)
+ if headers.has_key('if-none-match'):
+ del headers['if-none-match']
+ if headers.has_key('if-modified-since'):
+ del headers['if-modified-since']
+ if ('authorization' in headers and
+ not self.forward_authorization_headers):
+ del headers['authorization']
+ if response.has_key('location'):
+ location = response['location']
+ old_response = copy.deepcopy(response)
+ if not old_response.has_key('content-location'):
+ old_response['content-location'] = absolute_uri
+ redirect_method = method
+ if response.status in [302, 303]:
+ redirect_method = "GET"
+ body = None
+ (response, content) = self.request(
+ location, redirect_method, body=body, headers=headers,
+ redirections=redirections-1,
+ connection_type=conn.__class__)
+ response.previous = old_response
+ else:
+ raise httplib2.RedirectLimit(
+ "Redirected more times than redirection_limit allows.",
+ response, content)
+ elif response.status in [200, 203] and method in ["GET", "HEAD"]:
+ # Don't cache 206's since we aren't going to handle byte range
+ # requests
+ if not response.has_key('content-location'):
+ response['content-location'] = absolute_uri
+ httplib2._updateCache(headers, response, content, self.cache,
+ cachekey)
+
+ return (response, content)
+
+ # Wrap download_http so we do not use our custom connection_type
+ # on POSTS, which are used to refresh oauth tokens. We don't want to
+ # process the data received in those requests.
+ request_orig = download_http.request
+ def NewRequest(uri, method='GET', body=None, headers=None,
+ redirections=httplib2.DEFAULT_MAX_REDIRECTS,
+ connection_type=None):
+ if method == 'POST':
+ return request_orig(uri, method=method, body=body,
+ headers=headers, redirections=redirections,
+ connection_type=None)
+ else:
+ return request_orig(uri, method=method, body=body,
+ headers=headers, redirections=redirections,
+ connection_type=connection_type)
+
+ # Replace the request methods with our own closures.
+ download_http._request = types.MethodType(OverrideRequest, download_http)
+ download_http.request = NewRequest
+
+ return download_http
+
+
+class HttpWithDownloadStream(httplib2.Http):
+ """httplib2.Http variant that only pushes bytes through a stream.
+
+ httplib2 handles media by storing entire chunks of responses in memory,
which
+ is undesirable particularly when multiple instances are used during
+ multi-threaded/multi-process copy. This class copies and then overrides
some
+ httplib2 functions to use a streaming copy approach that uses small
memory
+ buffers.
+ """
+
+ def __init__(self, stream=None, *args, **kwds):
+ if stream is None:
+ raise apitools_exceptions.InvalidUserInputError(
+ 'Cannot create HttpWithDownloadStream with no stream')
+ self._stream = stream
+ super(HttpWithDownloadStream, self).__init__(*args, **kwds)
+
+ @property
+ def stream(self):
+ return self._stream
+
+ # pylint: disable=too-many-statements
+ def _conn_request(self, conn, request_uri, method, body, headers):
+ i = 0
+ seen_bad_status_line = False
+ while i < httplib2.RETRIES:
+ i += 1
+ try:
+ if hasattr(conn, 'sock') and conn.sock is None:
+ conn.connect()
+ conn.request(method, request_uri, body, headers)
+ except socket.timeout:
+ raise
+ except socket.gaierror:
+ conn.close()
+ raise httplib2.ServerNotFoundError(
+ 'Unable to find the server at %s' % conn.host)
+ except httplib2.ssl_SSLError:
+ conn.close()
+ raise
+ except socket.error, e:
+ err = 0
+ if hasattr(e, 'args'):
+ err = getattr(e, 'args')[0]
+ else:
+ err = e.errno
+ if err == httplib2.errno.ECONNREFUSED: # Connection refused
+ raise
+ except httplib.HTTPException:
+ # Just because the server closed the connection doesn't apparently
mean
+ # that the server didn't send a response.
+ if hasattr(conn, 'sock') and conn.sock is None:
+ if i < httplib2.RETRIES-1:
+ conn.close()
+ conn.connect()
+ continue
+ else:
+ conn.close()
+ raise
+ if i < httplib2.RETRIES-1:
+ conn.close()
+ conn.connect()
+ continue
+ try:
+ response = conn.getresponse()
+ except httplib.BadStatusLine:
+ # If we get a BadStatusLine on the first try then that means
+ # the connection just went stale, so retry regardless of the
+ # number of RETRIES set.
+ if not seen_bad_status_line and i == 1:
+ i = 0
+ seen_bad_status_line = True
+ conn.close()
+ conn.connect()
+ continue
+ else:
+ conn.close()
+ raise
+ except (socket.error, httplib.HTTPException):
+ if i < httplib2.RETRIES-1:
+ conn.close()
+ conn.connect()
+ continue
+ else:
+ conn.close()
+ raise
+ else:
+ content = ''
+ if method == 'HEAD':
+ conn.close()
+ response = httplib2.Response(response)
+ else:
+ if response.status in (httplib.OK, httplib.PARTIAL_CONTENT):
+ http_stream = response
+ # Start last_position and new_position at dummy values
+ last_position = -1
+ new_position = 0
+ while new_position != last_position:
+ last_position = new_position
+ new_data = http_stream.read(TRANSFER_BUFFER_SIZE)
+ self.stream.write(new_data)
+ new_position += len(new_data)
+ response = httplib2.Response(response)
+ else:
+ # We fall back to the current httplib2 behavior if we're
+ # not processing bytes (eg it's a redirect).
+ content = response.read()
+ response = httplib2.Response(response)
+ # pylint: disable=protected-access
+ content = httplib2._decompressContent(response, content)
+ break
+ return (response, content)
=======================================
--- /dev/null
+++ /third_party/gsutil/gslib/hashing_helper.py Mon Nov 3 12:38:28 2014 UTC
@@ -0,0 +1,405 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Helper functions for hashing functionality."""
+
+import base64
+import binascii
+from hashlib import md5
+import os
+
+from boto import config
+import crcmod
+
+from gslib.commands.config import CHECK_HASH_ALWAYS
+from gslib.commands.config import CHECK_HASH_IF_FAST_ELSE_FAIL
+from gslib.commands.config import CHECK_HASH_IF_FAST_ELSE_SKIP
+from gslib.commands.config import CHECK_HASH_NEVER
+from gslib.exception import CommandException
+from gslib.util import DEFAULT_FILE_BUFFER_SIZE
+from gslib.util import MIN_SIZE_COMPUTE_LOGGING
+from gslib.util import TRANSFER_BUFFER_SIZE
+from gslib.util import UsingCrcmodExtension
+
+
+SLOW_CRCMOD_WARNING = """
+WARNING: You have requested checksumming but your crcmod installation isn't
+using the module's C extension, so checksumming will run very slowly. For
help
+installing the extension, please see:
+ $ gsutil help crcmod
+"""
+
+
+_SLOW_CRCMOD_DOWNLOAD_WARNING = """
+WARNING: Downloading this composite object requires integrity checking with
+CRC32c, but your crcmod installation isn't using the module's C extension,
+so the hash computation will likely throttle download performance. For help
+installing the extension, please see:
+ $ gsutil help crcmod
+To disable slow integrity checking, see the "check_hashes" option in your
+boto config file.
+"""
+
+_SLOW_CRC_EXCEPTION_TEXT = """
+Downloading this composite object requires integrity checking with CRC32c,
+but your crcmod installation isn't using the module's C extension, so the
+hash computation will likely throttle download performance. For help
+installing the extension, please see:
+
+ $ gsutil help crcmod
+
+To download regardless of crcmod performance or to skip slow integrity
+checks, see the "check_hashes" option in your boto config file.
+
+NOTE: It is strongly recommended that you not disable integrity checks.
Doing so
+could allow data corruption to go undetected during
uploading/downloading."""
+
+
+_NO_HASH_CHECK_WARNING = """
+WARNING: This download will not be validated since your crcmod installation
+doesn't use the module's C extension, so the hash computation would likely
+throttle download performance. For help in installing the extension, please
+see:
+ $ gsutil help crcmod
+To force integrity checking, see the "check_hashes" option in your boto
config
+file.
+"""
+
+
+def _CalculateHashFromContents(fp, hash_alg):
+ """Calculates a base64 digest of the contents of a seekable stream.
+
+ This function resets the file pointer to position 0.
+
+ Args:
+ fp: An already-open file object.
+ hash_alg: Instance of hashing class initialized to start state.
+
+ Returns:
+ Hash of the stream in hex string format.
+ """
+ hash_dict = {'placeholder': hash_alg}
+ fp.seek(0)
+ CalculateHashesFromContents(fp, hash_dict)
+ fp.seek(0)
+ return hash_dict['placeholder'].hexdigest()
+
+
+def CalculateHashesFromContents(fp, hash_dict, callback_processor=None):
+ """Calculates hashes of the contents of a file.
+
+ Args:
+ fp: An already-open file object (stream will be consumed).
+ hash_dict: Dict of (string alg_name: initialized hashing class)
+ Hashing class will be populated with digests upon return.
+ callback_processor: Optional callback processing class that implements
+ Progress(integer amount of bytes processed).
+ """
+ while True:
+ data = fp.read(DEFAULT_FILE_BUFFER_SIZE)
+ if not data:
+ break
+ for hash_alg in hash_dict.itervalues():
+ hash_alg.update(data)
+ if callback_processor:
+ callback_processor.Progress(len(data))
+
+
+def CalculateB64EncodedCrc32cFromContents(fp):
+ """Calculates a base64 CRC32c checksum of the contents of a seekable
stream.
+
+ This function sets the stream position 0 before and after calculation.
+
+ Args:
+ fp: An already-open file object.
+
+ Returns:
+ CRC32c checksum of the file in base64 format.
+ """
+ return _CalculateB64EncodedHashFromContents(
+ fp, crcmod.predefined.Crc('crc-32c'))
+
+
+def CalculateB64EncodedMd5FromContents(fp):
+ """Calculates a base64 MD5 digest of the contents of a seekable stream.
+
+ This function sets the stream position 0 before and after calculation.
+
+ Args:
+ fp: An already-open file object.
+
+ Returns:
+ MD5 digest of the file in base64 format.
+ """
+ return _CalculateB64EncodedHashFromContents(fp, md5())
+
+
+def CalculateMd5FromContents(fp):
+ """Calculates a base64 MD5 digest of the contents of a seekable stream.
+
+ This function sets the stream position 0 before and after calculation.
+
+ Args:
+ fp: An already-open file object.
+
+ Returns:
+ MD5 digest of the file in hex format.
+ """
+ return _CalculateHashFromContents(fp, md5())
+
+
+def Base64EncodeHash(digest_value):
+ """Returns the base64-encoded version of the input hex digest value."""
+ return base64.encodestring(binascii.unhexlify(digest_value)).rstrip('\n')
+
+
+def _CalculateB64EncodedHashFromContents(fp, hash_alg):
+ """Calculates a base64 digest of the contents of a seekable stream.
+
+ This function sets the stream position 0 before and after calculation.
+
+ Args:
+ fp: An already-open file object.
+ hash_alg: Instance of hashing class initialized to start state.
+
+ Returns:
+ Hash of the stream in base64 format.
+ """
+ return Base64EncodeHash(_CalculateHashFromContents(fp, hash_alg))
+
+
+def GetUploadHashAlgs():
+ """Returns a dict of hash algorithms for validating an uploaded object.
+
+ This is for use only with single object uploads, not compose operations
+ such as those used by parallel composite uploads (though it can be used
to
+ validate the individual components).
+
+ Returns:
+ dict of (algorithm_name: hash_algorithm)
+ """
+ check_hashes_config = config.get(
+ 'GSUtil', 'check_hashes', CHECK_HASH_IF_FAST_ELSE_FAIL)
+ if check_hashes_config == 'never':
+ return {}
+ return {'md5': md5}
+
+
+def GetDownloadHashAlgs(logger, src_has_md5=False, src_has_crc32c=False):
+ """Returns a dict of hash algorithms for validating an object.
+
+ Args:
+ logger: logging.Logger for outputting log messages.
+ src_has_md5: If True, source object has an md5 hash.
+ src_has_crc32c: If True, source object has a crc32c hash.
+
+ Returns:
+ Dict of (string, hash algorithm).
+
+ Raises:
+ CommandException if hash algorithms satisfying the boto config file
+ cannot be returned.
+ """
+ check_hashes_config = config.get(
+ 'GSUtil', 'check_hashes', CHECK_HASH_IF_FAST_ELSE_FAIL)
+ if check_hashes_config == CHECK_HASH_NEVER:
+ return {}
+
+ hash_algs = {}
+ if src_has_md5:
+ hash_algs['md5'] = md5
+ elif src_has_crc32c:
+ # If the cloud provider supplies a CRC, we'll compute a checksum to
+ # validate if we're using a native crcmod installation and MD5 isn't
+ # offered as an alternative.
+ if UsingCrcmodExtension(crcmod):
+ hash_algs['crc32c'] = lambda: crcmod.predefined.Crc('crc-32c')
+ elif not hash_algs:
+ if check_hashes_config == CHECK_HASH_IF_FAST_ELSE_FAIL:
+ raise CommandException(_SLOW_CRC_EXCEPTION_TEXT)
+ elif check_hashes_config == CHECK_HASH_IF_FAST_ELSE_SKIP:
+ logger.warn(_NO_HASH_CHECK_WARNING)
+ elif check_hashes_config == CHECK_HASH_ALWAYS:
+ logger.warn(_SLOW_CRCMOD_DOWNLOAD_WARNING)
+ hash_algs['crc32c'] = lambda: crcmod.predefined.Crc('crc-32c')
+ else:
+ raise CommandException(
+ 'Your boto config \'check_hashes\' option is misconfigured.')
+
+ return hash_algs
+
+
+class HashingFileUploadWrapper(object):
+ """Wraps an input stream in a hash digester and exposes a stream
interface.
+
+ This class provides integrity checking during file uploads via the
+ following properties:
+
+ Calls to read will appropriately update digesters with all bytes read.
+ Calls to seek (assuming it is supported by the wrapped stream) using
+ os.SEEK_SET will catch up / reset the digesters to the specified
+ position. If seek is called with a different os.SEEK mode, the caller
+ must return to the original position using os.SEEK_SET before further
+ reads.
+ Calls to seek are fast if the desired position is equal to the position
at
+ the beginning of the last read call (we only need to re-hash bytes
+ from that point on).
+ """
+
+ def __init__(self, stream, digesters, hash_algs, src_url, logger):
+ """Initializes the wrapper.
+
+ Args:
+ stream: Input stream.
+ digesters: dict of {string: hash digester} containing digesters,
where
+ string is the name of the hash algorithm.
+ hash_algs: dict of {string: hash algorithm} for resetting and
+ recalculating digesters. String is the name of the hash
algorithm.
+ src_url: Source FileUrl that is being copied.
+ logger: For outputting log messages.
+ """
+ if not digesters:
+ raise CommandException('HashingFileUploadWrapper used with no
digesters.')
+ elif not hash_algs:
+ raise CommandException('HashingFileUploadWrapper used with no
hash_algs.')
+
+ self._orig_fp = stream
+ self._digesters = digesters
+ self._src_url = src_url
+ self._logger = logger
+ self._seek_away = None
+
+ self._digesters_previous = {}
+ for alg in self._digesters:
+ self._digesters_previous[alg] = self._digesters[alg].copy()
+ self._digesters_previous_mark = 0
+ self._digesters_current_mark = 0
+ self._hash_algs = hash_algs
+
+ def read(self, size=-1): # pylint: disable=invalid-name
+ """"Reads from the wrapped file pointer and calculates hash digests.
+
+ Args:
+ size: The amount of bytes to read. If ommited or negative, the entire
+ contents of the file will be read, hashed, and returned.
+
+ Returns:
+ Bytes from the wrapped stream.
+
+ Raises:
+ CommandException if the position of the wrapped stream is unknown.
+ """
+ if self._seek_away is not None:
+ raise CommandException('Read called on hashing file pointer in an '
+ 'unknown position; cannot correctly compute '
+ 'digest.')
+
+ data = self._orig_fp.read(size)
+ self._digesters_previous_mark = self._digesters_current_mark
+ for alg in self._digesters:
+ self._digesters_previous[alg] = self._digesters[alg].copy()
+ if len(data) >= MIN_SIZE_COMPUTE_LOGGING:
+ self._logger.info('Catching up %s for %s...', alg,
+ self._src_url.url_string)
+ self._digesters[alg].update(data)
+ self._digesters_current_mark += len(data)
+ return data
+
+ def tell(self): # pylint: disable=invalid-name
+ """Returns the current stream position."""
+ return self._orig_fp.tell()
+
+ def seekable(self): # pylint: disable=invalid-name
+ """Returns true if the stream is seekable."""
+ return self._orig_fp.seekable()
+
+ def seek(self, offset, whence=os.SEEK_SET): # pylint:
disable=invalid-name
+ """Seeks in the wrapped file pointer and catches up hash digests.
+
+ Args:
+ offset: The offset to seek to.
+ whence: os.SEEK_CUR, or SEEK_END, SEEK_SET.
+
+ Returns:
+ Return value from the wrapped stream's seek call.
+ """
+ if whence != os.SEEK_SET:
+ # We do not catch up hashes for non-absolute seeks, and rely on the
+ # caller to seek to an absolute position before reading.
+ self._seek_away = self._orig_fp.tell()
+
+ else:
+ # Hashes will be correct and it's safe to call read().
+ self._seek_away = None
+ if offset < self._digesters_previous_mark:
+ # This is earlier than our earliest saved digest, so we need to
+ # reset the digesters and scan from the beginning.
+ for alg in self._digesters:
+ self._digesters[alg] = self._hash_algs[alg]()
+ self._digesters_current_mark = 0
+ self._orig_fp.seek(0)
+ self._CatchUp(offset)
+
+ elif offset == self._digesters_previous_mark:
+ # Just load the saved digests.
+ self._digesters_current_mark = self._digesters_previous_mark
+ for alg in self._digesters:
+ self._digesters[alg] = self._digesters_previous[alg]
+
+ elif offset < self._digesters_current_mark:
+ # Reset the position to our previous digest and scan forward.
+ self._digesters_current_mark = self._digesters_previous_mark
+ for alg in self._digesters:
+ self._digesters[alg] = self._digesters_previous[alg]
+ self._orig_fp.seek(self._digesters_previous_mark)
+ self._CatchUp(offset - self._digesters_previous_mark)
+
+ else:
+ # Scan forward from our current digest and position.
+ self._orig_fp.seek(self._digesters_current_mark)
+ self._CatchUp(offset - self._digesters_current_mark)
+
+ return self._orig_fp.seek(offset, whence)
+
+ def _CatchUp(self, bytes_to_read):
+ """Catches up hashes, but does not return data and uses little memory.
+
+ Before calling this function, digesters_current_mark should be updated
+ to the current location of the original stream and the self._digesters
+ should be current to that point (but no further).
+
+ Args:
+ bytes_to_read: Number of bytes to catch up from the original stream.
+ """
+ if self._orig_fp.tell() != self._digesters_current_mark:
+ raise CommandException(
+ 'Invalid mark when catching up hashes. Stream position %s, hash '
+ 'position %s' % (self._orig_fp.tell(),
self._digesters_current_mark))
+
+ for alg in self._digesters:
+ if bytes_to_read >= MIN_SIZE_COMPUTE_LOGGING:
+ self._logger.info('Catching up %s for %s...', alg,
+ self._src_url.url_string)
+ self._digesters_previous[alg] = self._digesters[alg].copy()
+
+ self._digesters_previous_mark = self._digesters_current_mark
+ bytes_remaining = bytes_to_read
+ bytes_this_round = min(bytes_remaining, TRANSFER_BUFFER_SIZE)
+ while bytes_this_round:
+ data = self._orig_fp.read(bytes_this_round)
+ bytes_remaining -= bytes_this_round
+ for alg in self._digesters:
+ self._digesters[alg].update(data)
+ bytes_this_round = min(bytes_remaining, TRANSFER_BUFFER_SIZE)
+ self._digesters_current_mark += bytes_to_read
=======================================
--- /dev/null
+++ /third_party/gsutil/gslib/ls_helper.py Mon Nov 3 12:38:28 2014 UTC
@@ -0,0 +1,261 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Utility functions and class for listing commands such as ls and du."""
+
+from __future__ import absolute_import
+
+import fnmatch
+
+from gslib.exception import CommandException
+from gslib.plurality_checkable_iterator import PluralityCheckableIterator
+from gslib.util import UTF8
+from gslib.wildcard_iterator import StorageUrlFromString
+
+
+def PrintNewLine():
+ """Default function for printing new lines between directories."""
+ print
+
+
+def PrintDirHeader(bucket_listing_ref):
+ """Default function for printing headers for prefixes.
+
+ Header is printed prior to listing the contents of the prefix.
+
+ Args:
+ bucket_listing_ref: BucketListingRef of type PREFIX.
+ """
+ print '%s:' % bucket_listing_ref.url_string.encode(UTF8)
+
+
+def PrintBucketHeader(bucket_listing_ref): # pylint:
disable=unused-argument
+ """Default function for printing headers for buckets.
+
+ Header is printed prior to listing the contents of the bucket.
+
+ Args:
+ bucket_listing_ref: BucketListingRef of type BUCKET.
+ """
+ pass
+
+
+def PrintDir(bucket_listing_ref):
+ """Default function for printing buckets or prefixes.
+
+ Args:
+ bucket_listing_ref: BucketListingRef of type BUCKET or PREFIX.
+ """
+ print bucket_listing_ref.url_string.encode(UTF8)
+
+
+# pylint: disable=unused-argument
+def PrintDirSummary(num_bytes, bucket_listing_ref):
+ """Off-by-default function for printing buckets or prefix size summaries.
+
+ Args:
+ num_bytes: Number of bytes contained in the directory.
+ bucket_listing_ref: BucketListingRef of type BUCKET or PREFIX.
+ """
+ pass
+
+
+def PrintObject(bucket_listing_ref):
+ """Default printing function for objects.
+
+ Args:
+ bucket_listing_ref: BucketListingRef of type OBJECT.
+
+ Returns:
+ (num_objects, num_bytes).
+ """
+ print bucket_listing_ref.url_string.encode(UTF8)
+ return (1, 0)
+
+
+class LsHelper(object):
+ """Helper class for ls and du."""
+
+ def __init__(self, iterator_func, logger,
+ print_object_func=PrintObject,
+ print_dir_func=PrintDir,
+ print_dir_header_func=PrintDirHeader,
+ print_bucket_header_func=PrintBucketHeader,
+ print_dir_summary_func=PrintDirSummary,
+ print_newline_func=PrintNewLine,
+ all_versions=False, should_recurse=False,
+ exclude_patterns=None, fields=('name',)):
+ """Initializes the helper class to prepare for listing.
+
+ Args:
+ iterator_func: Function for instantiating iterator.
+ Inputs-
+ url_string- Url string to iterate on. May include
+ wildcards.
+ all_versions=False- If true, iterate over all object
+ versions.
+ logger: Logger for outputting warnings / errors.
+ print_object_func: Function for printing objects.
+ print_dir_func: Function for printing buckets/prefixes.
+ print_dir_header_func: Function for printing header line for buckets
+ or prefixes.
+ print_bucket_header_func: Function for printing header line for
buckets
+ or prefixes.
+ print_dir_summary_func: Function for printing size summaries about
+ buckets/prefixes.
+ print_newline_func: Function for printing new lines between dirs.
+ all_versions: If true, list all object versions.
+ should_recurse: If true, recursively listing buckets/prefixes.
+ exclude_patterns: Patterns to exclude when listing.
+ fields: Fields to request from bucket listings; this
should
+ include all fields that need to be populated in
+ objects so they can be listed. Can be set to None
+ to retrieve all object fields. Defaults to short
+ listing fields.
+ """
+ self._iterator_func = iterator_func
+ self.logger = logger
+ self._print_object_func = print_object_func
+ self._print_dir_func = print_dir_func
+ self._print_dir_header_func = print_dir_header_func
+ self._print_bucket_header_func = print_bucket_header_func
+ self._print_dir_summary_func = print_dir_summary_func
+ self._print_newline_func = print_newline_func
+ self.all_versions = all_versions
+ self.should_recurse = should_recurse
+ self.exclude_patterns = exclude_patterns
+ self.bucket_listing_fields = fields
+
+ def ExpandUrlAndPrint(self, url):
+ """Iterates over the given URL and calls print functions.
+
+ Args:
+ url: StorageUrl to iterate over.
+
+ Returns:
+ (num_objects, num_bytes) total number of objects and bytes iterated.
+ """
+ num_objects = 0
+ num_dirs = 0
+ num_bytes = 0
+ print_newline = False
+
+ if url.IsBucket() or self.should_recurse:
+ # IsBucket() implies a top-level listing.
+ if url.IsBucket():
+ self._print_bucket_header_func(url)
+ return self._RecurseExpandUrlAndPrint(url.url_string,
+ print_initial_newline=False)
+ else:
+ # User provided a prefix or object URL, but it's impossible to tell
+ # which until we do a listing and see what matches.
+ top_level_iterator = PluralityCheckableIterator(self._iterator_func(
+ url.CreatePrefixUrl(wildcard_suffix=None),
+ all_versions=self.all_versions).IterAll(
+ expand_top_level_buckets=True,
+ bucket_listing_fields=self.bucket_listing_fields))
+ plurality = top_level_iterator.HasPlurality()
+
+ for blr in top_level_iterator:
+ if self._MatchesExcludedPattern(blr):
+ continue
+ if blr.IsObject():
+ nd = 0
+ no, nb = self._print_object_func(blr)
+ print_newline = True
+ elif blr.IsPrefix():
+ if print_newline:
+ self._print_newline_func()
+ else:
+ print_newline = True
+ if plurality:
+ self._print_dir_header_func(blr)
+ expansion_url_str = StorageUrlFromString(
+ blr.url_string).CreatePrefixUrl(wildcard_suffix='*')
+ nd, no, nb = self._RecurseExpandUrlAndPrint(expansion_url_str)
+ self._print_dir_summary_func(nb, blr)
+ else:
+ # We handle all buckets at the top level, so this should never
happen.
+ raise CommandException(
+ 'Sub-level iterator returned a CsBucketListingRef of type
Bucket')
+ num_objects += no
+ num_dirs += nd
+ num_bytes += nb
+ return num_dirs, num_objects, num_bytes
+
+ def _RecurseExpandUrlAndPrint(self, url_str, print_initial_newline=True):
+ """Iterates over the given URL string and calls print functions.
+
+ Args:
+ url_str: String describing StorageUrl to iterate over.
+ Must be of depth one or higher.
+ print_initial_newline: If true, print a newline before recursively
+ expanded prefixes.
+
+ Returns:
+ (num_objects, num_bytes) total number of objects and bytes iterated.
+ """
+ num_objects = 0
+ num_dirs = 0
+ num_bytes = 0
+ for blr in self._iterator_func(
+ '%s' % url_str, all_versions=self.all_versions).IterAll(
+ expand_top_level_buckets=True,
+ bucket_listing_fields=self.bucket_listing_fields):
+ if self._MatchesExcludedPattern(blr):
+ continue
+
+ if blr.IsObject():
+ nd = 0
+ no, nb = self._print_object_func(blr)
+ elif blr.IsPrefix():
+ if self.should_recurse:
+ if print_initial_newline:
+ self._print_newline_func()
+ else:
+ print_initial_newline = True
+ self._print_dir_header_func(blr)
+ expansion_url_str = StorageUrlFromString(
+ blr.url_string).CreatePrefixUrl(wildcard_suffix='*')
+
+ nd, no, nb = self._RecurseExpandUrlAndPrint(expansion_url_str)
+ self._print_dir_summary_func(nb, blr)
+ else:
+ nd, no, nb = 1, 0, 0
+ self._print_dir_func(blr)
+ else:
+ # We handle all buckets at the top level, so this should never
happen.
+ raise CommandException(
+ 'Sub-level iterator returned a bucketListingRef of type
Bucket')
+ num_dirs += nd
+ num_objects += no
+ num_bytes += nb
+
+ return num_dirs, num_objects, num_bytes
+
+ def _MatchesExcludedPattern(self, blr):
+ """Checks bucket listing reference against patterns to exclude.
+
+ Args:
+ blr: BucketListingRef to check.
+
+ Returns:
+ True if reference matches a pattern and should be excluded.
+ """
+ if self.exclude_patterns:
+ tomatch = blr.url_string
+ for pattern in self.exclude_patterns:
+ if fnmatch.fnmatch(tomatch, pattern):
+ return True
+ return False
=======================================
--- /dev/null
+++ /third_party/gsutil/gslib/no_op_credentials.py Mon Nov 3 12:38:28 2014
UTC
@@ -0,0 +1,30 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""No-op implementation of credentials for JSON HTTP requests."""
+
+from __future__ import absolute_import
+
+
+class NoOpCredentials(object):
+
+ def __init__(self):
+ pass
+
+ def authorize(self, http_obj): # pylint: disable=invalid-name
+ return http_obj
+
+ def set_store(self, store): # pylint: disable=invalid-name
+ pass
+
=======================================
--- /dev/null
+++ /third_party/gsutil/gslib/progress_callback.py Mon Nov 3 12:38:28 2014
UTC
@@ -0,0 +1,153 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Helper functions for progress callbacks."""
+
+import logging
+import sys
+
+from gslib.util import MakeHumanReadable
+from gslib.util import UTF8
+
+# Default upper and lower bounds for progress callback frequency.
+_START_BYTES_PER_CALLBACK = 1024*64
+_MAX_BYTES_PER_CALLBACK = 1024*1024*100
+
+# Max width of URL to display in progress indicator. Wide enough to allow
+# 15 chars for x/y display on an 80 char wide terminal.
+MAX_PROGRESS_INDICATOR_COLUMNS = 65
+
+
+class ProgressCallbackWithBackoff(object):
+ """Makes progress callbacks with exponential backoff to a maximum value.
+
+ This prevents excessive log message output.
+ """
+
+ def __init__(self, total_size, callback_func,
+ start_bytes_per_callback=_START_BYTES_PER_CALLBACK,
+ max_bytes_per_callback=_MAX_BYTES_PER_CALLBACK,
+ calls_per_exponent=10):
+ """Initializes the callback with backoff.
+
+ Args:
+ total_size: Total bytes to process.
+ callback_func: Func of (int: processed_so_far, int: total_bytes)
+ used to make callbacks.
+ start_bytes_per_callback: Lower bound of bytes per callback.
+ max_bytes_per_callback: Upper bound of bytes per callback.
+ calls_per_exponent: Number of calls to make before reducing rate.
+ """
+ self._bytes_per_callback = start_bytes_per_callback
+ self._callback_func = callback_func
+ self._calls_per_exponent = calls_per_exponent
+ self._max_bytes_per_callback = max_bytes_per_callback
+ self._total_size = total_size
+
+ self._bytes_processed_since_callback = 0
+ self._callbacks_made = 0
+ self._total_bytes_processed = 0
+
+ def Progress(self, bytes_processed):
+ """Tracks byte processing progress, making a callback if necessary."""
+ self._bytes_processed_since_callback += bytes_processed
+ # TODO: We check if >= total_size and truncate because JSON uploads
count
+ # metadata during their send progress.
+ if (self._bytes_processed_since_callback > self._bytes_per_callback or
+ (self._total_bytes_processed +
self._bytes_processed_since_callback >=
+ self._total_size)):
+ self._total_bytes_processed += self._bytes_processed_since_callback
+ self._callback_func(min(self._total_bytes_processed,
self._total_size),
+ self._total_size)
+ self._bytes_processed_since_callback = 0
+ self._callbacks_made += 1
+
+ if self._callbacks_made > self._calls_per_exponent:
+ self._bytes_per_callback = min(self._bytes_per_callback * 2,
+ self._max_bytes_per_callback)
+ self._callbacks_made = 0
+
+
+def ConstructAnnounceText(operation_name, url_string):
+ """Constructs announce text for ongoing operations on url_to_display.
+
+ This truncates the text to a maximum of MAX_PROGRESS_INDICATOR_COLUMNS.
+ Thus, concurrent output (gsutil -m) leaves progress counters in a
readable
+ (fixed) position.
+
+ Args:
+ operation_name: String describing the operation, i.e.
+ 'Uploading' or 'Hashing'.
+ url_string: String describing the file/object being processed.
+
+ Returns:
+ Formatted announce text for outputting operation progress.
+ """
+ # Operation name occupies 11 characters (enough for 'Downloading'), plus
a
+ # space. The rest is used for url_to_display. If a longer operation name
is
+ # used, it will be truncated. We can revisit this size if we need to
support
+ # a longer operation, but want to make sure the terminal output is
meaningful.
+ justified_op_string = operation_name[:11].ljust(12)
+ start_len = len(justified_op_string)
+ end_len = len(': ')
+ if (start_len + len(url_string) + end_len >
+ MAX_PROGRESS_INDICATOR_COLUMNS):
+ ellipsis_len = len('...')
+ url_string = '...%s' % url_string[
+ -(MAX_PROGRESS_INDICATOR_COLUMNS - start_len - end_len -
ellipsis_len):]
+ base_announce_text = '%s%s:' % (justified_op_string, url_string)
+ format_str = '{0:%ds}' % MAX_PROGRESS_INDICATOR_COLUMNS
+ return format_str.format(base_announce_text.encode(UTF8))
+
+
+class FileProgressCallbackHandler(object):
+ """Outputs progress info for large operations like file copy or hash."""
+
+ def __init__(self, announce_text, logger):
+ """Initializes the callback handler.
+
+ Args:
+ announce_text: String describing the operation.
+ logger: For outputting log messages.
+ """
+ self._announce_text = announce_text
+ self._logger = logger
+
+ # Function signature is in boto callback format, which cannot be changed.
+ def call(self, # pylint: disable=invalid-name
+ total_bytes_processed,
+ total_size):
+ """Prints an overwriting line to stderr describing the operation
progress.
+
+ Args:
+ total_bytes_processed: Number of bytes processed so far.
+ total_size: Total size of the ongoing operation.
+ """
+ if not self._logger.isEnabledFor(logging.INFO):
+ return
+
+ # Handle streaming case specially where we don't know the total size:
+ if total_size:
+ total_size_string = '/%s' % MakeHumanReadable(total_size)
+ else:
+ total_size_string = ''
+ # Use sys.stderr.write instead of self.logger.info so progress messages
+ # output on a single continuously overwriting line.
+ # TODO: Make this work with logging.Logger.
+ sys.stderr.write('%s%s%s \r' % (
+ self._announce_text,
+ MakeHumanReadable(total_bytes_processed),
+ total_size_string))
+ if total_size and total_bytes_processed == total_size:
+ sys.stderr.write('\n')
=======================================
--- /dev/null
+++ /third_party/gsutil/gslib/storage_url.py Mon Nov 3 12:38:28 2014 UTC
@@ -0,0 +1,303 @@
+# -*- coding: utf-8 -*-
+# Copyright 2013 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""File and Cloud URL representation classes."""
+
+from __future__ import absolute_import
+
+import os
+import re
+
+from gslib.exception import InvalidUrlError
+
+# Matches provider strings of the form 'gs://'
+PROVIDER_REGEX = re.compile(r'(?P<provider>[^:]*)://$')
+# Matches bucket strings of the form 'gs://bucket'
+BUCKET_REGEX =
re.compile(r'(?P<provider>[^:]*)://(?P<bucket>[^/]*)/{0,1}$')
+# Matches object strings of the form 'gs://bucket/obj'
+OBJECT_REGEX = re.compile(
+ r'(?P<provider>[^:]*)://(?P<bucket>[^/]*)/(?P<object>.*)')
+# Matches versioned object strings of the form 'gs://bucket/obj#1234'
+GS_GENERATION_REGEX = re.compile(r'(?P<object>.+)#(?P<generation>[0-9]+)$')
+# Matches versioned object strings of the form 's3://bucket/obj#NULL'
+S3_VERSION_REGEX = re.compile(r'(?P<object>.+)#(?P<version_id>.+)$')
+# Matches file strings of the form 'file://dir/filename'
+FILE_OBJECT_REGEX = re.compile(r'([^:]*://)(?P<filepath>.*)')
+# Regex to disallow buckets violating charset or not [3..255] chars total.
+BUCKET_NAME_RE =
re.compile(r'^[a-zA-Z0-9][a-zA-Z0-9\._-]{1,253}[a-zA-Z0-9]$')
+# Regex to disallow buckets with individual DNS labels longer than 63.
+TOO_LONG_DNS_NAME_COMP = re.compile(r'[-_a-z0-9]{64}')
+# Regex to determine if a string contains any wildcards.
+WILDCARD_REGEX = re.compile(r'[*?\[\]]')
+
+
+class StorageUrl(object):
+ """Abstract base class for file and Cloud Storage URLs."""
+
+ def Clone(self):
+ raise NotImplementedError('Clone not overridden')
+
+ def IsFileUrl(self):
+ raise NotImplementedError('IsFileUrl not overridden')
+
+ def IsCloudUrl(self):
+ raise NotImplementedError('IsCloudUrl not overridden')
+
+ def IsStream(self):
+ raise NotImplementedError('IsStream not overridden')
+
+ def CreatePrefixUrl(self, wildcard_suffix=None):
+ """Returns a prefix of this URL that can be used for iterating.
+
+ Args:
+ wildcard_suffix: If supplied, this wildcard suffix will be appended
to the
+ prefix with a trailing slash before being returned.
+
+ Returns:
+ A prefix of this URL that can be used for iterating.
+
+ If this URL contains a trailing slash, it will be stripped to create
the
+ prefix. This helps avoid infinite looping when prefixes are iterated,
but
+ preserves other slashes so that objects with '/' in the name are
handled
+ properly.
+
+ For example, when recursively listing a bucket with the following
contents:
+ gs://bucket// <-- object named slash
+ gs://bucket//one-dir-deep
+ a top-level expansion with '/' as a delimiter will result in the
following
+ URL strings:
+ 'gs://bucket//' : OBJECT
+ 'gs://bucket//' : PREFIX
+ If we right-strip all slashes from the prefix entry and add a wildcard
+ suffix, we will get 'gs://bucket/*' which will produce identical
results
+ (and infinitely recurse).
+
+ Example return values:
+ ('gs://bucket/subdir/', '*') becomes 'gs://bucket/subdir/*'
+ ('gs://bucket/', '*') becomes 'gs://bucket/*'
+ ('gs://bucket/', None) becomes 'gs://bucket'
+ ('gs://bucket/subdir//', '*') becomes 'gs://bucket/subdir//*'
+ ('gs://bucket/subdir///', '**') becomes 'gs://bucket/subdir///**'
+ ('gs://bucket/subdir/', '*') where 'subdir/' is an object becomes
+ 'gs://bucket/subdir/*', but iterating on this will
return 'subdir/'
+ as a BucketListingObject, so we will not recurse on it as a
subdir
+ during listing.
+ """
+ raise NotImplementedError('GetPrefixWithWildcard not overridden')
+
+ @property
+ def url_string(self):
+ raise NotImplementedError('url_string not overridden')
+
+ @property
+ def versionless_url_string(self):
+ raise NotImplementedError('versionless_url_string not overridden')
+
+ def __eq__(self, other):
+ return self.url_string == other.url_string
+
+ def __hash__(self):
+ return hash(self.url_string)
+
+
+class _FileUrl(StorageUrl):
+ """File URL class providing parsing and convenience methods.
+
+ This class assists with usage and manipulation of an
+ (optionally wildcarded) file URL string. Depending on the string
+ contents, this class represents one or more directories or files.
+
+ For File URLs, scheme is always file, bucket_name is always blank,
+ and object_name contains the file/directory path.
+ """
+
+ def __init__(self, url_string, is_stream=False):
+ self.scheme = 'file'
+ self.bucket_name = ''
+ match = FILE_OBJECT_REGEX.match(url_string)
+ if match and match.lastindex == 2:
+ self.object_name = match.group(2)
+ else:
+ self.object_name = url_string
+ self.generation = None
+ self.is_stream = is_stream
+ self.delim = os.sep
+
+ def Clone(self):
+ return _FileUrl(self.url_string)
+
+ def IsFileUrl(self):
+ return True
+
+ def IsCloudUrl(self):
+ return False
+
+ def IsStream(self):
+ return self.is_stream
+
+ def IsDirectory(self):
+ return not self.IsStream() and os.path.isdir(self.object_name)
+
+ def CreatePrefixUrl(self, wildcard_suffix=None):
+ return self.url_string
+
+ @property
+ def url_string(self):
+ return '%s://%s' % (self.scheme, self.object_name)
+
+ @property
+ def versionless_url_string(self):
+ return self.url_string
+
+ def __str__(self):
+ return self.url_string
+
+
+class _CloudUrl(StorageUrl):
+ """Cloud URL class providing parsing and convenience methods.
+
+ This class assists with usage and manipulation of an
+ (optionally wildcarded) cloud URL string. Depending on the string
+ contents, this class represents a provider, bucket(s), or object(s).
+
+ This class operates only on strings. No cloud storage API calls are
+ made from this class.
+ """
+
+ def __init__(self, url_string):
+ self.scheme = None
+ self.bucket_name = None
+ self.object_name = None
+ self.generation = None
+ self.delim = '/'
+ provider_match = PROVIDER_REGEX.match(url_string)
+ bucket_match = BUCKET_REGEX.match(url_string)
+ if provider_match:
+ self.scheme = provider_match.group('provider')
+ elif bucket_match:
+ self.scheme = bucket_match.group('provider')
+ self.bucket_name = bucket_match.group('bucket')
+ if (not ContainsWildcard(self.bucket_name) and
+ (not BUCKET_NAME_RE.match(self.bucket_name) or
+ TOO_LONG_DNS_NAME_COMP.search(self.bucket_name))):
+ raise InvalidUrlError('Invalid bucket name in URL "%s"' %
url_string)
+ else:
+ object_match = OBJECT_REGEX.match(url_string)
+ if object_match:
+ self.scheme = object_match.group('provider')
+ self.bucket_name = object_match.group('bucket')
+ self.object_name = object_match.group('object')
+ if self.scheme == 'gs':
+ generation_match = GS_GENERATION_REGEX.match(self.object_name)
+ if generation_match:
+ self.object_name = generation_match.group('object')
+ self.generation = generation_match.group('generation')
+ elif self.scheme == 's3':
+ version_match = S3_VERSION_REGEX.match(self.object_name)
+ if version_match:
+ self.object_name = version_match.group('object')
+ self.generation = version_match.group('version_id')
+ else:
+ raise InvalidUrlError(
+ 'CloudUrl: URL string %s did not match URL regex' % url_string)
+
+ def Clone(self):
+ return _CloudUrl(self.url_string)
+
+ def IsFileUrl(self):
+ return False
+
+ def IsCloudUrl(self):
+ return True
+
+ def IsStream(self):
+ raise NotImplementedError('IsStream not supported on CloudUrl')
+
+ def IsBucket(self):
+ return bool(self.bucket_name and not self.object_name)
+
+ def IsObject(self):
+ return bool(self.bucket_name and self.object_name)
+
+ def HasGeneration(self):
+ return bool(self.generation)
+
+ def IsProvider(self):
+ return bool(self.scheme and not self.bucket_name)
+
+ def CreatePrefixUrl(self, wildcard_suffix=None):
+ prefix = StripOneSlash(self.versionless_url_string)
+ if wildcard_suffix:
+ prefix = '%s/%s' % (prefix, wildcard_suffix)
+ return prefix
+
+ @property
+ def bucket_url_string(self):
+ return '%s://%s/' % (self.scheme, self.bucket_name)
+
+ @property
+ def url_string(self):
+ url_str = self.versionless_url_string
+ if self.HasGeneration():
+ url_str += '#%s' % self.generation
+ return url_str
+
+ @property
+ def versionless_url_string(self):
+ if self.IsProvider():
+ return '%s://' % self.scheme
+ elif self.IsBucket():
+ return self.bucket_url_string
+ return '%s://%s/%s' % (self.scheme, self.bucket_name, self.object_name)
+
+ def __str__(self):
+ return self.url_string
+
+
+def StorageUrlFromString(url_str):
+ """Static factory function for creating a StorageUrl from a string."""
+
+ end_scheme_idx = url_str.find('://')
+ if end_scheme_idx == -1:
+ # File is the default scheme.
+ scheme = 'file'
+ path = url_str
+ else:
+ scheme = url_str[0:end_scheme_idx].lower()
+ path = url_str[end_scheme_idx + 3:]
+
+ if scheme not in ('file', 's3', 'gs'):
+ raise InvalidUrlError('Unrecognized scheme "%s"' % scheme)
+ if scheme == 'file':
+ is_stream = (path == '-')
+ return _FileUrl(url_str, is_stream=is_stream)
+ return _CloudUrl(url_str)
+
+
+def StripOneSlash(url_str):
+ if url_str and url_str.endswith('/'):
+ return url_str[:-1]
+ return url_str
+
+
+def ContainsWildcard(url_string):
+ """Checks whether url_string contains a wildcard.
+
+ Args:
+ url_string: URL string to check.
+
+ Returns:
+ bool indicator.
+ """
+ return bool(WILDCARD_REGEX.search(url_string))
=======================================
--- /dev/null
+++ /third_party/gsutil/gslib/tests/mock_cloud_api.py Mon Nov 3 12:38:28
2014 UTC
@@ -0,0 +1,160 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Implements a simple mock gsutil Cloud API for unit testing."""
+
+from gslib.cloud_api import ServiceException
+from gslib.third_party.storage_apitools import storage_v1_messages as
apitools_messages
+from gslib.translation_helper import CreateBucketNotFoundException
+from gslib.translation_helper import CreateObjectNotFoundException
+
+
+class MockObject(object):
+ """Defines a mock cloud storage provider object."""
+
+ def __init__(self, root_object, contents=''):
+ self.root_object = root_object
+ self.contents = contents
+
+ def __str__(self):
+ return '%s/%s#%s' % (self.root_object.bucket,
+ self.root_object.name,
+ self.root_object.generation)
+
+ def __repr__(self):
+ return str(self)
+
+
+class MockBucket(object):
+ """Defines a mock cloud storage provider bucket."""
+
+ def __init__(self, bucket_name, versioned=False):
+ self.root_object = apitools_messages.Bucket(
+ name=bucket_name,
+
versioning=apitools_messages.Bucket.VersioningValue(enabled=versioned))
+ # Dict of object_name: (dict of 'live': MockObject
+ # 'versioned': ordered list of
MockObject).
+ self.objects = {}
+
+ def CreateObject(self, object_name, contents=''):
+ return self.CreateObjectWithMetadata(MockObject(
+ apitools_messages.Object(name=object_name, contents=contents)))
+
+ def CreateObjectWithMetadata(self, apitools_object, contents=''):
+ """Creates an object in the bucket according to the input metadata.
+
+ This will create a new object version (ignoring the generation
specified
+ in the input object).
+
+ Args:
+ apitools_object: apitools Object.
+ contents: optional object contents.
+
+ Returns:
+ apitools Object representing created object.
+ """
+ # This modifies the apitools_object with a generation number.
+ object_name = apitools_object.name
+ if (self.root_object.versioning and
self.root_object.versioning.enabled and
+ apitools_object.name in self.objects):
+ if 'live' in self.objects[object_name]:
+ # Versioning enabled and object exists, create an object with a
+ # generation 1 higher.
+ apitools_object.generation = (
+ self.objects[object_name]['live'].root_object.generation + 1)
+ # Move the live object to versioned.
+ if 'versioned' not in self.objects[object_name]:
+ self.objects[object_name]['versioned'] = []
+ self.objects[object_name]['versioned'].append(
+ self.objects[object_name]['live'])
+ elif ('versioned' in self.objects[object_name] and
+ self.objects[object_name]['versioned']):
+ # Versioning enabled but only archived objects exist, pick a
generation
+ # higher than the highest versioned object (which will be at the
end).
+ apitools_object.generation = (
+
self.objects[object_name]['versioned'][-1].root_object.generation
+ + 1)
+ else:
+ # Versioning disabled or no objects exist yet with this name.
+ apitools_object.generation = 1
+ self.objects[object_name] = {}
+ new_object = MockObject(apitools_object, contents=contents)
+ self.objects[object_name]['live'] = new_object
+ return new_object
+
+
+class MockCloudApi(object):
+ """Simple mock service for buckets/objects that implements Cloud API.
+
+ Also includes some setup functions for tests.
+ """
+
+ def __init__(self, provider='gs'):
+ self.buckets = {}
+ self.provider = provider
+
+ def MockCreateBucket(self, bucket_name):
+ """Creates a simple bucket without exercising the API directly."""
+ if bucket_name in self.buckets:
+ raise ServiceException('Bucket %s already exists.' % bucket_name,
+ status=409)
+ self.buckets[bucket_name] = MockBucket(bucket_name)
+
+ def MockCreateVersionedBucket(self, bucket_name):
+ """Creates a simple bucket without exercising the API directly."""
+ if bucket_name in self.buckets:
+ raise ServiceException('Bucket %s already exists.' % bucket_name,
+ status=409)
+ self.buckets[bucket_name] = MockBucket(bucket_name, versioned=True)
+
+ def MockCreateObject(self, bucket_name, object_name, contents=''):
+ """Creates an object without exercising the API directly."""
+ if bucket_name not in self.buckets:
+ self.MockCreateBucket(bucket_name)
+ self.buckets[bucket_name].CreateObject(object_name, contents=contents)
+
+ def MockCreateObjectWithMetadata(self, apitools_object, contents=''):
+ """Creates an object without exercising the API directly."""
+ assert apitools_object.bucket, 'No bucket specified for mock object'
+ assert apitools_object.name, 'No object name specified for mock object'
+ if apitools_object.bucket not in self.buckets:
+ self.MockCreateBucket(apitools_object.bucket)
+ return self.buckets[apitools_object.bucket].CreateObjectWithMetadata(
+ apitools_object, contents=contents).root_object
+
+ # pylint: disable=unused-argument
+ def GetObjectMetadata(self, bucket_name, object_name, generation=None,
+ provider=None, fields=None):
+ """See CloudApi class for function doc strings."""
+ if generation:
+ generation = long(generation)
+ if bucket_name in self.buckets:
+ bucket = self.buckets[bucket_name]
+ if object_name in bucket.objects and bucket.objects[object_name]:
+ if generation:
+ if 'versioned' in bucket.objects[object_name]:
+ for obj in bucket.objects[object_name]['versioned']:
+ if obj.root_object.generation == generation:
+ return obj.root_object
+ if 'live' in bucket.objects[object_name]:
+ if (bucket.objects[object_name]['live'].root_object.generation
==
+ generation):
+ return bucket.objects[object_name]['live'].root_object
+ else:
+ # Return live object.
+ if 'live' in bucket.objects[object_name]:
+ return bucket.objects[object_name]['live'].root_object
+ raise CreateObjectNotFoundException(404, self.provider, bucket_name,
+ object_name)
+ raise CreateBucketNotFoundException(404, self.provider, bucket_name)
=======================================
--- /dev/null
+++ /third_party/gsutil/gslib/tests/mock_logging_handler.py Mon Nov 3
12:38:28 2014 UTC
@@ -0,0 +1,37 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Mock logging handler to check for expected logs."""
+
+import logging
+
+
+class MockLoggingHandler(logging.Handler):
+ """Mock logging handler to check for expected logs."""
+
+ def __init__(self, *args, **kwargs):
+ self.reset()
+ logging.Handler.__init__(self, *args, **kwargs)
+
+ def emit(self, record):
+ self.messages[record.levelname.lower()].append(record.getMessage())
+
+ def reset(self):
+ self.messages = {
+ 'debug': [],
+ 'info': [],
+ 'warning': [],
+ 'error': [],
+ 'critical': [],
+ }
=======================================
--- /dev/null
+++ /third_party/gsutil/gslib/tests/test_bucketconfig.py Mon Nov 3
12:38:28 2014 UTC
@@ -0,0 +1,102 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Integration tests for multiple bucket configuration commands."""
+
+import json
+import gslib.tests.testcase as testcase
+from gslib.tests.testcase.integration_testcase import SkipForS3
+from gslib.tests.util import ObjectToURI as suri
+
+
+class TestBucketConfig(testcase.GsUtilIntegrationTestCase):
+ """Integration tests for multiple bucket configuration commands."""
+
+ _set_cors_command = ['cors', 'set']
+ _get_cors_command = ['cors', 'get']
+
+ empty_cors = '[]'
+
+ cors_doc = (
+ '[{"origin": ["http://origin1.example.com", '
+ '"http://origin2.example.com"], '
+ '"responseHeader": ["foo", "bar"], "method":
["GET", "PUT", "POST"], '
+ '"maxAgeSeconds": 3600},'
+ '{"origin": ["http://origin3.example.com"], '
+ '"responseHeader": ["foo2", "bar2"], "method":
["GET", "DELETE"]}]\n')
+ cors_json_obj = json.loads(cors_doc)
+
+ _set_lifecycle_command = ['lifecycle', 'set']
+ _get_lifecycle_command = ['lifecycle', 'get']
+
+ empty_lifecycle = '{}'
+
+ lifecycle_doc = (
+ '{"rule": [{"action": {"type": "Delete"}, "condition": {"age":
365}}]}\n')
+ lifecycle_json_obj = json.loads(lifecycle_doc)
+
+ _set_acl_command = ['acl', 'set']
+ _get_acl_command = ['acl', 'get']
+ _set_defacl_command = ['defacl', 'set']
+ _get_defacl_command = ['defacl', 'get']
+
+ @SkipForS3('A number of configs in this test are not supported by S3')
+ def test_set_multi_config(self):
+ """Tests that bucket config patching affects only the desired
config."""
+ bucket_uri = self.CreateBucket()
+ lifecycle_path = self.CreateTempFile(contents=self.lifecycle_doc)
+ cors_path = self.CreateTempFile(contents=self.cors_doc)
+
+ self.RunGsUtil(self._set_cors_command + [cors_path, suri(bucket_uri)])
+ cors_out = self.RunGsUtil(self._get_cors_command + [suri(bucket_uri)],
+ return_stdout=True)
+ self.assertEqual(json.loads(cors_out), self.cors_json_obj)
+
+ self.RunGsUtil(self._set_lifecycle_command + [lifecycle_path,
+ suri(bucket_uri)])
+ cors_out = self.RunGsUtil(self._get_cors_command + [suri(bucket_uri)],
+ return_stdout=True)
+ lifecycle_out = self.RunGsUtil(self._get_lifecycle_command +
+ [suri(bucket_uri)], return_stdout=True)
+ self.assertEqual(json.loads(cors_out), self.cors_json_obj)
+ self.assertEqual(json.loads(lifecycle_out), self.lifecycle_json_obj)
+
+ self.RunGsUtil(
+ self._set_acl_command + ['authenticated-read', suri(bucket_uri)])
+
+ cors_out = self.RunGsUtil(self._get_cors_command + [suri(bucket_uri)],
+ return_stdout=True)
+ lifecycle_out = self.RunGsUtil(self._get_lifecycle_command +
+ [suri(bucket_uri)], return_stdout=True)
+ acl_out = self.RunGsUtil(self._get_acl_command + [suri(bucket_uri)],
+ return_stdout=True)
+ self.assertEqual(json.loads(cors_out), self.cors_json_obj)
+ self.assertEqual(json.loads(lifecycle_out), self.lifecycle_json_obj)
+ self.assertIn('allAuthenticatedUsers', acl_out)
+
+ self.RunGsUtil(
+ self._set_defacl_command + ['public-read', suri(bucket_uri)])
+
+ cors_out = self.RunGsUtil(self._get_cors_command + [suri(bucket_uri)],
+ return_stdout=True)
+ lifecycle_out = self.RunGsUtil(self._get_lifecycle_command +
+ [suri(bucket_uri)], return_stdout=True)
+ acl_out = self.RunGsUtil(self._get_acl_command + [suri(bucket_uri)],
+ return_stdout=True)
+ def_acl_out = self.RunGsUtil(self._get_defacl_command +
[suri(bucket_uri)],
+ return_stdout=True)
+ self.assertEqual(json.loads(cors_out), self.cors_json_obj)
+ self.assertEqual(json.loads(lifecycle_out), self.lifecycle_json_obj)
+ self.assertIn('allAuthenticatedUsers', acl_out)
+ self.assertIn('allUsers', def_acl_out)
=======================================
--- /dev/null
+++ /third_party/gsutil/gslib/tests/test_copy_helper_funcs.py Mon Nov 3
12:38:28 2014 UTC
@@ -0,0 +1,335 @@
+# -*- coding: utf-8 -*-
+# Copyright 2013 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Unit tests for parallel upload functions in copy_helper."""
+
+from gslib.copy_helper import
_AppendComponentTrackerToParallelUploadTrackerFile
+from gslib.copy_helper import _CreateParallelUploadTrackerFile
+from gslib.copy_helper import _GetPartitionInfo
+from gslib.copy_helper import _HashFilename
+from gslib.copy_helper import _ParseParallelUploadTrackerFile
+from gslib.copy_helper import FilterExistingComponents
+from gslib.copy_helper import ObjectFromTracker
+from gslib.copy_helper import PerformParallelUploadFileToObjectArgs
+from gslib.hashing_helper import CalculateB64EncodedMd5FromContents
+from gslib.storage_url import StorageUrlFromString
+from gslib.tests.mock_cloud_api import MockCloudApi
+from gslib.tests.testcase.unit_testcase import GsUtilUnitTestCase
+from gslib.third_party.storage_apitools import storage_v1_messages as
apitools_messages
+from gslib.util import CreateLock
+
+
+class TestCpFuncs(GsUtilUnitTestCase):
+ """Unit tests for parallel upload functions in cp command."""
+
+ def test_HashFilename(self):
+ # Tests that _HashFilename function works for both string and unicode
+ # filenames (without raising any Unicode encode/decode errors).
+ _HashFilename('file1')
+ _HashFilename(u'file1')
+
+ def test_GetPartitionInfo(self):
+ """Tests the _GetPartitionInfo function."""
+ # Simplest case - threshold divides file_size.
+ (num_components, component_size) = _GetPartitionInfo(300, 200, 10)
+ self.assertEqual(30, num_components)
+ self.assertEqual(10, component_size)
+
+ # Threshold = 1 (mod file_size).
+ (num_components, component_size) = _GetPartitionInfo(301, 200, 10)
+ self.assertEqual(31, num_components)
+ self.assertEqual(10, component_size)
+
+ # Threshold = -1 (mod file_size).
+ (num_components, component_size) = _GetPartitionInfo(299, 200, 10)
+ self.assertEqual(30, num_components)
+ self.assertEqual(10, component_size)
+
+ # Too many components needed.
+ (num_components, component_size) = _GetPartitionInfo(301, 2, 10)
+ self.assertEqual(2, num_components)
+ self.assertEqual(151, component_size)
+
+ # Test num_components with huge numbers.
+ (num_components, component_size) = _GetPartitionInfo((10 ** 150) + 1,
+ 10 ** 200,
+ 10)
+ self.assertEqual((10 ** 149) + 1, num_components)
+ self.assertEqual(10, component_size)
+
+ # Test component_size with huge numbers.
+ (num_components, component_size) = _GetPartitionInfo((10 ** 150) + 1,
+ 10,
+ 10)
+ self.assertEqual(10, num_components)
+ self.assertEqual((10 ** 149) + 1, component_size)
+
+ # Test component_size > file_size (make sure we get at least two
components.
+ (num_components, component_size) = _GetPartitionInfo(100, 500, 51)
+ self.assertEquals(2, num_components)
+ self.assertEqual(50, component_size)
+
+ def test_ParseParallelUploadTrackerFile(self):
+ """Tests the _ParseParallelUploadTrackerFile function."""
+ tracker_file_lock = CreateLock()
+ random_prefix = '123'
+ objects = ['obj1', '42', 'obj2', '314159']
+ contents = '\n'.join([random_prefix] + objects)
+ fpath = self.CreateTempFile(file_name='foo',
+ contents=contents)
+ expected_objects = [ObjectFromTracker(objects[2 * i], objects[2 * i +
1])
+ for i in range(0, len(objects) / 2)]
+ (actual_prefix, actual_objects) = _ParseParallelUploadTrackerFile(
+ fpath, tracker_file_lock)
+ self.assertEqual(random_prefix, actual_prefix)
+ self.assertEqual(expected_objects, actual_objects)
+
+ def test_CreateParallelUploadTrackerFile(self):
+ """Tests the _CreateParallelUploadTrackerFile function."""
+ tracker_file = self.CreateTempFile(file_name='foo', contents='asdf')
+ tracker_file_lock = CreateLock()
+ random_prefix = '123'
+ objects = ['obj1', '42', 'obj2', '314159']
+ expected_contents = [random_prefix] + objects
+ objects = [ObjectFromTracker(objects[2 * i], objects[2 * i + 1])
+ for i in range(0, len(objects) / 2)]
+ _CreateParallelUploadTrackerFile(tracker_file, random_prefix, objects,
+ tracker_file_lock)
+ with open(tracker_file, 'rb') as f:
+ lines = f.read().splitlines()
+ self.assertEqual(expected_contents, lines)
+
+ def test_AppendComponentTrackerToParallelUploadTrackerFile(self):
+ """Tests the _CreateParallelUploadTrackerFile function with append."""
+ tracker_file = self.CreateTempFile(file_name='foo', contents='asdf')
+ tracker_file_lock = CreateLock()
+ random_prefix = '123'
+ objects = ['obj1', '42', 'obj2', '314159']
+ expected_contents = [random_prefix] + objects
+ objects = [ObjectFromTracker(objects[2 * i], objects[2 * i + 1])
+ for i in range(0, len(objects) / 2)]
+ _CreateParallelUploadTrackerFile(tracker_file, random_prefix, objects,
+ tracker_file_lock)
+
+ new_object = ['obj2', '1234']
+ expected_contents += new_object
+ new_object = ObjectFromTracker(new_object[0], new_object[1])
+ _AppendComponentTrackerToParallelUploadTrackerFile(tracker_file,
new_object,
+ tracker_file_lock)
+ with open(tracker_file, 'rb') as f:
+ lines = f.read().splitlines()
+ self.assertEqual(expected_contents, lines)
+
+ def test_FilterExistingComponentsNonVersioned(self):
+ """Tests upload with a variety of component states."""
+ mock_api = MockCloudApi()
+ bucket_name = self.MakeTempName('bucket')
+ tracker_file = self.CreateTempFile(file_name='foo', contents='asdf')
+ tracker_file_lock = CreateLock()
+
+ # dst_obj_metadata used for passing content-type.
+ empty_object = apitools_messages.Object()
+
+ # Already uploaded, contents still match, component still used.
+ fpath_uploaded_correctly = self.CreateTempFile(file_name='foo1',
+ contents='1')
+ fpath_uploaded_correctly_url = StorageUrlFromString(
+ str(fpath_uploaded_correctly))
+ object_uploaded_correctly_url = StorageUrlFromString('%s://%s/%s' % (
+ self.default_provider, bucket_name,
+ fpath_uploaded_correctly))
+ with open(fpath_uploaded_correctly) as f_in:
+ fpath_uploaded_correctly_md5 =
CalculateB64EncodedMd5FromContents(f_in)
+ mock_api.MockCreateObjectWithMetadata(
+ apitools_messages.Object(bucket=bucket_name,
+ name=fpath_uploaded_correctly,
+ md5Hash=fpath_uploaded_correctly_md5),
+ contents='1')
+
+ args_uploaded_correctly = PerformParallelUploadFileToObjectArgs(
+ fpath_uploaded_correctly, 0, 1, fpath_uploaded_correctly_url,
+ object_uploaded_correctly_url, '', empty_object, tracker_file,
+ tracker_file_lock)
+
+ # Not yet uploaded, but needed.
+ fpath_not_uploaded = self.CreateTempFile(file_name='foo2',
contents='2')
+ fpath_not_uploaded_url = StorageUrlFromString(str(fpath_not_uploaded))
+ object_not_uploaded_url = StorageUrlFromString('%s://%s/%s' % (
+ self.default_provider, bucket_name, fpath_not_uploaded))
+ args_not_uploaded = PerformParallelUploadFileToObjectArgs(
+ fpath_not_uploaded, 0, 1, fpath_not_uploaded_url,
+ object_not_uploaded_url, '', empty_object, tracker_file,
+ tracker_file_lock)
+
+ # Already uploaded, but contents no longer match. Even though the
contents
+ # differ, we don't delete this since the bucket is not versioned and it
+ # will be overwritten anyway.
+ fpath_wrong_contents = self.CreateTempFile(file_name='foo4',
contents='4')
+ fpath_wrong_contents_url =
StorageUrlFromString(str(fpath_wrong_contents))
+ object_wrong_contents_url = StorageUrlFromString('%s://%s/%s' % (
+ self.default_provider, bucket_name, fpath_wrong_contents))
+ with open(self.CreateTempFile(contents='_')) as f_in:
+ fpath_wrong_contents_md5 = CalculateB64EncodedMd5FromContents(f_in)
+ mock_api.MockCreateObjectWithMetadata(
+ apitools_messages.Object(bucket=bucket_name,
+ name=fpath_wrong_contents,
+ md5Hash=fpath_wrong_contents_md5),
+ contents='1')
+
+ args_wrong_contents = PerformParallelUploadFileToObjectArgs(
+ fpath_wrong_contents, 0, 1, fpath_wrong_contents_url,
+ object_wrong_contents_url, '', empty_object, tracker_file,
+ tracker_file_lock)
+
+ # Exists in tracker file, but component object no longer exists.
+ fpath_remote_deleted = self.CreateTempFile(file_name='foo5',
contents='5')
+ fpath_remote_deleted_url = StorageUrlFromString(
+ str(fpath_remote_deleted))
+ args_remote_deleted = PerformParallelUploadFileToObjectArgs(
+ fpath_remote_deleted, 0, 1, fpath_remote_deleted_url, '', '',
+ empty_object, tracker_file, tracker_file_lock)
+
+ # Exists in tracker file and already uploaded, but no longer needed.
+ fpath_no_longer_used = self.CreateTempFile(file_name='foo6',
contents='6')
+ with open(fpath_no_longer_used) as f_in:
+ file_md5 = CalculateB64EncodedMd5FromContents(f_in)
+ mock_api.MockCreateObjectWithMetadata(
+ apitools_messages.Object(bucket=bucket_name,
+ name='foo6', md5Hash=file_md5),
contents='6')
+
+ dst_args = {fpath_uploaded_correctly: args_uploaded_correctly,
+ fpath_not_uploaded: args_not_uploaded,
+ fpath_wrong_contents: args_wrong_contents,
+ fpath_remote_deleted: args_remote_deleted}
+
+ existing_components = [ObjectFromTracker(fpath_uploaded_correctly, ''),
+ ObjectFromTracker(fpath_wrong_contents, ''),
+ ObjectFromTracker(fpath_remote_deleted, ''),
+ ObjectFromTracker(fpath_no_longer_used, '')]
+
+ bucket_url = StorageUrlFromString('%s://%s' % (self.default_provider,
+ bucket_name))
+
+ (components_to_upload, uploaded_components,
existing_objects_to_delete) = (
+ FilterExistingComponents(dst_args, existing_components,
+ bucket_url, mock_api))
+
+ for arg in [args_not_uploaded, args_wrong_contents,
args_remote_deleted]:
+ self.assertTrue(arg in components_to_upload)
+ self.assertEqual(1, len(uploaded_components))
+ self.assertEqual(args_uploaded_correctly.dst_url.url_string,
+ uploaded_components[0].url_string)
+ self.assertEqual(1, len(existing_objects_to_delete))
+ no_longer_used_url = StorageUrlFromString('%s://%s/%s' % (
+ self.default_provider, bucket_name, fpath_no_longer_used))
+ self.assertEqual(no_longer_used_url.url_string,
+ existing_objects_to_delete[0].url_string)
+
+ def test_FilterExistingComponentsVersioned(self):
+ """Tests upload with versionined parallel components."""
+
+ mock_api = MockCloudApi()
+ bucket_name = self.MakeTempName('bucket')
+ mock_api.MockCreateVersionedBucket(bucket_name)
+
+ # dst_obj_metadata used for passing content-type.
+ empty_object = apitools_messages.Object()
+
+ tracker_file = self.CreateTempFile(file_name='foo', contents='asdf')
+ tracker_file_lock = CreateLock()
+
+ # Already uploaded, contents still match, component still used.
+ fpath_uploaded_correctly = self.CreateTempFile(file_name='foo1',
+ contents='1')
+ fpath_uploaded_correctly_url = StorageUrlFromString(
+ str(fpath_uploaded_correctly))
+ with open(fpath_uploaded_correctly) as f_in:
+ fpath_uploaded_correctly_md5 =
CalculateB64EncodedMd5FromContents(f_in)
+ object_uploaded_correctly = mock_api.MockCreateObjectWithMetadata(
+ apitools_messages.Object(bucket=bucket_name,
+ name=fpath_uploaded_correctly,
+ md5Hash=fpath_uploaded_correctly_md5),
+ contents='1')
+ object_uploaded_correctly_url = StorageUrlFromString('%s://%s/%s#%s' %
(
+ self.default_provider, bucket_name,
+ fpath_uploaded_correctly, object_uploaded_correctly.generation))
+ args_uploaded_correctly = PerformParallelUploadFileToObjectArgs(
+ fpath_uploaded_correctly, 0, 1, fpath_uploaded_correctly_url,
+ object_uploaded_correctly_url,
object_uploaded_correctly.generation,
+ empty_object, tracker_file, tracker_file_lock)
+
+ # Duplicate object name in tracker file, but uploaded correctly.
+ fpath_duplicate = fpath_uploaded_correctly
+ fpath_duplicate_url = StorageUrlFromString(str(fpath_duplicate))
+ duplicate_uploaded_correctly = mock_api.MockCreateObjectWithMetadata(
+ apitools_messages.Object(bucket=bucket_name,
+ name=fpath_duplicate,
+ md5Hash=fpath_uploaded_correctly_md5),
+ contents='1')
+ duplicate_uploaded_correctly_url =
StorageUrlFromString('%s://%s/%s#%s' % (
+ self.default_provider, bucket_name,
+ fpath_uploaded_correctly, duplicate_uploaded_correctly.generation))
+ args_duplicate = PerformParallelUploadFileToObjectArgs(
+ fpath_duplicate, 0, 1, fpath_duplicate_url,
+ duplicate_uploaded_correctly_url,
+ duplicate_uploaded_correctly.generation, empty_object,
tracker_file,
+ tracker_file_lock)
+
+ # Already uploaded, but contents no longer match.
+ fpath_wrong_contents = self.CreateTempFile(file_name='foo4',
contents='4')
+ fpath_wrong_contents_url =
StorageUrlFromString(str(fpath_wrong_contents))
+ with open(self.CreateTempFile(contents='_')) as f_in:
+ fpath_wrong_contents_md5 = CalculateB64EncodedMd5FromContents(f_in)
+ object_wrong_contents = mock_api.MockCreateObjectWithMetadata(
+ apitools_messages.Object(bucket=bucket_name,
+ name=fpath_wrong_contents,
+ md5Hash=fpath_wrong_contents_md5),
+ contents='_')
+ wrong_contents_url = StorageUrlFromString('%s://%s/%s#%s' % (
+ self.default_provider, bucket_name,
+ fpath_wrong_contents, object_wrong_contents.generation))
+ args_wrong_contents = PerformParallelUploadFileToObjectArgs(
+ fpath_wrong_contents, 0, 1, fpath_wrong_contents_url,
+ wrong_contents_url, '', empty_object, tracker_file,
+ tracker_file_lock)
+
+ dst_args = {fpath_uploaded_correctly: args_uploaded_correctly,
+ fpath_wrong_contents: args_wrong_contents}
+
+ existing_components = [
+ ObjectFromTracker(fpath_uploaded_correctly,
+ object_uploaded_correctly_url.generation),
+ ObjectFromTracker(fpath_duplicate,
+ duplicate_uploaded_correctly_url.generation),
+ ObjectFromTracker(fpath_wrong_contents,
+ wrong_contents_url.generation)]
+
+ bucket_url = StorageUrlFromString('%s://%s' % (self.default_provider,
+ bucket_name))
+
+ (components_to_upload, uploaded_components,
existing_objects_to_delete) = (
+ FilterExistingComponents(dst_args, existing_components,
+ bucket_url, mock_api))
+
+ self.assertEqual([args_wrong_contents], components_to_upload)
+ self.assertEqual(args_uploaded_correctly.dst_url.url_string,
+ uploaded_components[0].url_string)
+ expected_to_delete = [(args_wrong_contents.dst_url.object_name,
+ args_wrong_contents.dst_url.generation),
+ (args_duplicate.dst_url.object_name,
+ args_duplicate.dst_url.generation)]
+ for uri in existing_objects_to_delete:
+ self.assertTrue((uri.object_name, uri.generation) in
expected_to_delete)
+ self.assertEqual(len(expected_to_delete),
len(existing_objects_to_delete))
+
=======================================
--- /dev/null
+++ /third_party/gsutil/gslib/tests/test_creds_config.py Mon Nov 3
12:38:28 2014 UTC
@@ -0,0 +1,63 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for various combinations of configured credentials."""
+
+from gslib.cred_types import CredTypes
+from gslib.exception import CommandException
+from gslib.gcs_json_api import GcsJsonApi
+from gslib.tests.mock_logging_handler import MockLoggingHandler
+import gslib.tests.testcase as testcase
+from gslib.tests.util import SetBotoConfigForTest
+
+
+class TestCredsConfig(testcase.GsUtilUnitTestCase):
+ """Tests for various combinations of configured credentials."""
+
+ def setUp(self):
+ super(TestCredsConfig, self).setUp()
+ self.log_handler = MockLoggingHandler()
+ self.logger.addHandler(self.log_handler)
+
+ def testMultipleConfiguredCreds(self):
+ with SetBotoConfigForTest([
+ ('Credentials', 'gs_oauth2_refresh_token', 'foo'),
+ ('Credentials', 'gs_service_client_id', 'bar'),
+ ('Credentials', 'gs_service_key_file', 'baz')]):
+
+ try:
+ GcsJsonApi(None, self.logger)
+ self.fail('Succeeded with multiple types of configured creds.')
+ except CommandException, e:
+ msg = str(e)
+ self.assertIn('types of configured credentials', msg)
+ self.assertIn(CredTypes.OAUTH2_USER_ACCOUNT, msg)
+ self.assertIn(CredTypes.OAUTH2_SERVICE_ACCOUNT, msg)
+
+ def testExactlyOneInvalid(self):
+ with SetBotoConfigForTest([
+ ('Credentials', 'gs_oauth2_refresh_token', 'foo'),
+ ('Credentials', 'gs_service_client_id', None),
+ ('Credentials', 'gs_service_key_file', None)]):
+ succeeded = False
+ try:
+ GcsJsonApi(None, self.logger)
+ succeeded = True # If we self.fail() here, the except below will
catch
+ except: # pylint: disable=bare-except
+ warning_messages = self.log_handler.messages['warning']
+ self.assertEquals(1, len(warning_messages))
+ self.assertIn('credentials are invalid', warning_messages[0])
+ self.assertIn(CredTypes.OAUTH2_USER_ACCOUNT, warning_messages[0])
+ if succeeded:
+ self.fail('Succeeded with invalid credentials, one configured.')
=======================================
--- /dev/null
+++ /third_party/gsutil/gslib/tests/test_data/test.p12 Mon Nov 3 12:38:28
2014 UTC
Binary file, no diff available.
=======================================
--- /dev/null
+++ /third_party/gsutil/gslib/tests/test_data/test.txt Mon Nov 3 12:38:28
2014 UTC
@@ -0,0 +1,1 @@
+MJoTMtsCkSgUkBDOz8WBdaOKxcD7e0QTCVGun1pw3WmPy0f568YNRhivoiz5VtwXysqKVECi3i8RDtVgC7OiFzSMAf47COa77NgDDSygjTkMMAGy3tnFtXQiqZKZmhYt414YKNg9qFadeXBfHIglkR80ofpdSek6KRsN9LBba9GCxcT1L0OxIoiZ1zlZSgTxJcjw3qiCnESZC0XMs3HI3zFiGjGK4xqvGfWKzWNcr9atFXkKuoLhWQFcJP6xi1iq60arFFuPuimpLY7GtIi9STFKCjOyFCA38ruOgTVIeWZu3rgSE8dLw4stG9piAQtcmroHDw3HMl7g8yBuxtNGodYvn1KBran3zG0HUmrb1wkGMqJ209flQmoaBC16pRB7HECW4Ujdhc6m3n6d1bLb7mEJmHxLYMsk1C00PU8aDEMpI0Y6VcTjKlTewqMTBs5kogh9KKUPKq8MuoNkNcuoa6jk1Kz3tAArNnsoxVFbFQ5MHit9OWpgJ5k16wVjfox154Ja15m4KPtH1JX6TRWNHxvPBholwqwa44ToeUgDl0DQ3zJPK6FHB3Geg9OjbX4V05Me2bzUdzMzzT6AaJDEmzsHW1tlICbHeVh4c9Jyb8GR6Q6pkK2MQmpmUTZgmR3BHRa27V8N8Ddi9zdOJOvVt4EJo7vzgBYff7XsH9Yvns2g5pv3dKZG6cntaEHlPx7PxeRz9aXgxNcUtUBTP4YItbnQThwejSLs4RVDSDN1z89zcyWOjMbQbXTkEt80vstZWIFn7HnFGEVdoFtK1xhCUAlfkfQtiYNBlthhYnxpdCip0UW8y565qxWDvU7FvcMurOk30d5O7twKH2rKg0pHFcwlKH0h0BWIEcvDBElUZT3hVwZskhH69HUAzomz8d2lViMLZz7li75Zcp155NrNc2scPxlI7flvGYia91hfrheOyfNlt98NDaeso32YOWI71hHmXvf8UVY48uFVwvC52osapfdS7EtEWlzDH8iuiejUKtKCijfncsIRboz6vaSPU4t1BZOxfMMXPCFQ3G0FV6QYsZErlJ2nozsYj3PdSry5Oyh2ATl3GzZovN9ukGUAyBpxVcPrOqYRvMtYbKLueBkCpqtKLkUSY6HjBsr3ZgRV8n77MV0U1UIckIcFibtt3wnrwpABtHhlhN7aNl5xkkxRkVWXlPa6f0lX5BTgFJZaz5oPNwJbWzoSTYcbwRPoWQoW23vZJwSByDtl6pIA2GuvZSzfuYlVsauiQOux2yxGMblPouBtztGrJcvYvmDYQwbou6qlK1tny6VDHh9yoLfMuw5xM5EEL6PswMaBGBEpxe5CMMFdyatpfVqwbiD3YU5pI7qsb6yrPuNmz9smdPJMd4kPADpMUJyX8vqsRRcrOVdDCJTjJmdk2JB0JzucYAKpq34zkM5ZRBDYUBkHLph9pi4EC4kMIbdp0sVg6isOySOu8YvYPDmxV5HqtHDU89Kjb6wCzSmRTuJ8Ks2f9FRbBahooaVdO1EZsjkybvt7NLnQzIXXunDv6cXskqSnJXDcTkQUn547lKeXMbD3elgqyXu0z1yxIwiU9RJO4uGWv3Vinj7em1Up0oYO1tJwCFdcvNtnfMSaWPj7RSyvMOsa5ymPOj3YoPWcJx3r0YrEkjYvS2afgp30YE7nwIfAIRIPOKCQpUDVIsm4uV8D8cUFhpw0xKpwHaxMSiRKzcWsLen55ZpIezqAPGdy1FT929qYb5nAJQhGti76rRqLEdEtb1LXbkLbU15E5zLiHc4qOMwyjfZuuytAge7a8jJ8GdgAbGnwyHYPaYWpgFCO3ygLFRYoQFTUQ3i5S1IcmWRnWQkNk3mObvenYdWa8kpUyX8WLg3oA5k30qlwz31xHTNsS8CyewdBuRKVsoqWY8yw28vziEMbkuvVukbeaQHhHfjLlYI5oz7wx0lJd8EIdI32LhifTxhIkn1ISkZcBZwC8CzWJv46kqTJIG6UqvkFyJugQhoNOrvNIYKBri4QHFZoPVHGLuBG5B60PacOsw8toJN70ZURVUop4PgOSqIWbE8Xkr77JB8z1ZMrwI8ukQ6ZIYL1tqaD3MpGkNSMRA2u2GGNS7WWeDZAz7c54XHrilurkZFWHG1NqYxSapeMDVtigPas859UUmoRrhDRUd0I9GleCsVLI6SOjeJEVk3qBvYxfpH7TsUltCRSsayrROZkWs9HPB6heN06Y9lUgcohNNRIarFsihycvFlFsPi7GRNCeqPyrmNVEQCa4xF6M45VApIC8qjSsKo0Qte4j1IT3HjYAXlL7osXcr7t8Ah9CXv2kEMeYuV8YSk4YLaL6LeqfdV5giKPoQIAvhmBpKWm6QxiFvck76YA3m4gogCg7lXOxHxQ4ufejkdUxGbQ16kHyLIFs9M6V1X8unsOsGie5J3IXOOfJQTHv9Ex18GNNw8dQVymi6lzejaLxU8Qgmb2DmL4QoGCThqZF3qU2EK4uKVX8SNIL0zI5hGH1uFc3eaEekCYzG2lJWl9E5KMsplGOXQcTOTd6JsjxRwgdTdZ5cFYAL0Pw9F9OkBVOlmNUeKhTe84J5vbbsynKFk7zNdOSZTAsqX0QlTmfd6UnYeFmR3RekVlS6p0rWBHi1hWDjkJD4bzMk6ko7sowHE3KAypFMx5pdULwh8Fveo7JdGaKYTcPztBvbE6wOwhp61oxoIx042T6oecZGOYElxqQRdrYg6vtRWA5f3GIk4OgPSFNqi3CyokX6cbYsuIi7mYaCr6V36RLicxL0YomGySRxyqxzR5qfMuPMi5Gfq0HEm8QpNDzPQUErFNpzYKnTcY8QVEMh1zqOKXD3lipvJ7rdP82MWiOhey5wHNoeMCMm1SdEFUbNN3mYvf2n7cAJDqjsAIn7RlJeJ4luTIn8E1yXDc6HEjNv7Wh2nJ7N0Gg0UKFSlQA2kokPmYdENUm7GcXiE0a8YcnltTZ7TOsZ0VVgLDjO85MzxPpl1YiNjYUzztKeO4wAfMmC8sic6pbrfAVdkCD3swAe8uVnS5ZJaOd1hXcMGxAHHVhN3nQl3AgZbLOwnb7ckhhBkCg31ZGpcLwGLUCV9hEm0TofU1xaqqeCOPADL7LOxZnEWTBbwD17yW4VDBW1lLgOipjLzStuWw8RdnPAuOHWvj8rd5K7UztVQllkTudn6PF1qxyeDXAoRSdyeU1BtHf9fiCr4rHgR6hfzLw2DzNpCxcBgZjJ5TjVNUTa1IvXnIGuGue9d3E4gZuBEeOlq2gfWzJyxnOSWB8Vit5h5Z1cSwtQmJa9E7GiWVyqmxdERsBgG0lM9kin6GAhQSEJMmi3BVzfmN81VacnOO8ua3LLmMqNi1C3NiuJTXXTxpYvQVJ6NUJP3gc2AagAp7D699xQglKkIoqa4hBRGiZTxtNXvZgg1BKFt72XIxgRrNuUQuRX19shcgpfQ6qvyTYSa96qDTpbvAflFOqOTRd3Puch8RRo0eDeNiTBdqRbpRXCuSSXrJlBpNmrV8KevmHwOd5O6um3PsQjDvBt46W2LB14oDWnjqScES0M1Ul0kVQAwssvmF4aND8GuexMzvEfen8kL6ke2kWMgcHJxwwnhQghuZr251oSzZMm6UMDj4mhhkoc6NflFL7pGqAp0jq1QSnUF3Iz2Nn7AEDtHOCe0AVQprdrqePyRtUAjlbgtiB62SnfiIC1lTmhPcMFOAGOxhFilEeEz3xlSYkxirx9Sj7z9C3ZxczW9uGvOXlIbwQTIFEMYCiegMPqgeqaICrAfzYsTXoQLq80JUaha680HMpqBRgpN921XT1GXlnlhk4syU561IB9IAOhx1CfuRo4tHH0LkDQ8nV4hJZOS5QeqBsGaiyvUOD5tzr6yindNS5aKgjbrPwkhlYi8RksvGUuIBFsvcoQ3eTzfs5QeeoUn6YUDb9P42siesBspdzyNONN8txLaUs8JUFVoeckFjg1z191p1Teim2YkAx21rnZckC34l86qPFdYHV48DB3xDydlRFMZwyx6xqGaFUzWkQqxl7Jnwe0xqKsW3dIhkqkgZgpYrzFSykVNmMUHPv0bqoFubVKxOdgPx5VAAg1SLfumPadKLBzA42nJipoq8w2qBu7A30Wm28hGV4xas1gO6YWWOvTn9BJjloiUsdsnnMoEEOlDtUSHB2DLbc8kDGiPQ14HOgsTnQ1CqB4tA8zBWKrjldeiiXB7jYF5Q54wAWyVZAos1iMa4Epf60kZBwJZE8hTthUUeeoKh80CPs37XMA1DH2WSIQeimMePuOZTbndXp0WI5sYHz1dWlBXFMf82LQODekVNqCSOLIPmHaIq6nXgccllYcviijpSctCVYi1vLeixF58XgADhTD1i1mTP7rlc3yz42vlEu6CvJnXYfXTf3NYj1hRTnnJbWnh1DxY9PnPZ6tN411nnXnVWtyvBfG1GrXsmGv1lq0iUawshveQPt34RO0CUICtG5BwEqrNkUnYCcRlNxz85hCzBoj9gd3hR30M2B6me3vQXzpBMGU2VwianszcS5AKIaYzuoMVRkSy3DJIY7Ne0qbcZqpNwG40m8ylAfl2e9l6imHbuS7P2skeEIoox7G93VURl2rCjKWyFk2fphU045pRMqntRsVDwdaU2hanO6OJGwkDyJr9ZWJLUOocvzC9osooBtRlo4mSOLc0nwfoDFf2DCDIzZ9gtNX9xxJY5jKVu2O7bdmpnlaRzl95svwEIy5PBd1T0hpaFCU7hJYNueGx8yOhDluUfhmafp4q3QBjSrJG2HlzWcgIf5zvlzM5xa4wk6CF1etXXUVs3cpYyEn0l37Zf4sBAtLH72WD9vW7K6E2kbc1dJjqD1dL3wrRq4mK9oLme5gynB0cx5IOEu1rRwXHsUVQ0WAqJr8KYl4EGpoixUfjfG0b735PhIEk6UStRk049cWL2yMaWp6KnoRjdAYOK9FXoDbC1NKAQMq2xWdwzU38vx0uTolmw6LTzTj1JAIxzHVL0XnuW85TvMEgYboSQiAQsUAO345cgYSn4WWbExJKnpERHax1QytDJOl1dAfjvDpG4lF4pFmdcSihxFdfltVcoYCUlILOzmDTOBnXcphvqKrfXYLKzzBdkuObG1AIiAjSqVi1n9CTifhyVHr1YbZB0TfZwypiH6X01PKUtbN7wXxufqmenIkYm4wSDg1eettz2FQrLCk8t4FT3INLeQgnmH5kbyMd04kTZ3beHDrOtXyGmcmorvNoUKoPl9uJTkIg0Uf3m9lhYxUOFeaVfUPmCzIPPIoEbMIreL7phoVn674o0BXNKI6HV9upFOlRE2XKM5oru2JjxhbX5N0vFr3E7pjhfceNfYmjHc7y64rvrXjY8YcJTrXEssPDx4aEy9gqmDbEyLcx4I1HNX5cLAjNhRAE1Fq7xWEh0bPeoko3GFoUD6VBFWtpxwfwdzb0f9dFvrIPuwjCoCyz3fvCKMONwSIIHKIL9BvOuaRYRyF1B7BE38x6V8rySAg5GhnWVjiEwAxYfVllRbHNLe7lo8dknCjOF1SN0V2kzgiR58t7S12qWwECQe84545Ch9mCWHGwi7PD5XNRFZoZqGniTsInKAo3KZ6mNvVLg0OUMdootzdPq1JW86tX33xlXCDygZvJbR6L7TTGWynGODDmN5I6rg4j5H8iTAtjdOiNdI5cB2r9teszoCfAH84wS3ywQ8pLPY1f2nNA9svxH1RvHlXrjXgzXsexgTJ9tB07AgHOkSMOl4tSDAwv8wQbEkjWVFu6o3hcAtIhaoBvEeoS0Nvs3ISjKq9OPQ3DfYWCFakEUsVx3UT5liDpkan99L5ztviNimUuV0adFqEkbN14jSLdu9SX0MpkxpreE3zzZEyrB2UbMI02JdRizWOSGRZuH8HhJ8OVKcZn1NEkwLqHLXNQhArBLDxPEB09P2F8CkVLuPZ6Mwbn7GlRCJuTacw98fyTgDpcSdM6DMybGGxbvv6tbaC8fVDuM1ODMuhvisZO85Ar5Bs3zNVj1SczhZljLU5yU6JHzKx2NWor4XkV7dfzT8wzZpRWnHsoiGm4Pcay1631LF8v2EQ6or0A681wQhkgADr5SQlccZdRbv5ZMvzXpf6XV1DGqueMzyPQzZcUluEXVOYm5jLxJzTAhpxuhJ7cbvq0O5qLHP1ybNftG4BIWsQEOyxHnPenNHmgt7HsJ5togoN7rMbozVB0bp1IyD8THwMKKmJAjccK5ysoK7WPAjts1Qi2Avme4hAMZckiwc6nZzYmhw8rVCMO02zpOy3ZDaxGMcyQaVkpoxqISZQ2WPyuuIjv9asQeW0lP75PMZqWT3oq5zg1EOWTgzFoRGerJVfqbdXHzhT0EDrt923DJs5JbeyDvgAGLSynT6owWhsFI33jXxinwh7Sm2FD1ObBw8S19TQQvBfRlcdxFxhBL9iTTR3mM9DuOX6OJhldF7pSq0qOqvo6cQtb8pQ1TaAzDC1VetaegPsClQBmvYyUWYmR5vdpFZqzjlDGDB9BG8g0Ngnd6HKQYofTWzFnDAiZdfcPeexfDe7FNSezUiiuAnKD0PrNsGdJ59pqzTgCpUIBZ00lNwtiBhLVwymzhxVOPpX2CM8IWuHxQdleRmr4Zr3KB5Fu8GaaqC137n9klk96flS41n01u2dbbgJCQ9NhCQa3ZWgoyqU5zDvZwzsti15dqs0ToaPffBSFynj7AKCgTiONeUR0JVdGS9OuUGfLIUMpkOKCsGnlGjiIbXfAPWJlvSVQEc9CZbukysVBQ1DwtiG1jfmjI1X2mrOA0vo4GifQ5lBN7SYoGaRuJEue7uZysnOmN1z6L01CxrwW1OgL9VikNhwtlADmYRskqrblLWHl5RIlU6LA5TzHwS6jDcqNd5G5yfKqYFf11UnT0PqtAwBHxSouf8L5mTiiTCDej9Es0mRuEHyDFZyiF2SkC8q1cSGD42jL599vjAYweYGeZyhqxtpVCLnTnCmgR00wvXLfC7Qq9Dae4mRwnkpW8FKxdgU6EcZ4jb5TzQK9Tk7hFv4xdBKOsSFffzk6sb0IltjtUChUAmAWYVMl5eFx1yS2Icyin9zITEZBZzj0g9n2zl20pOx8vUHDGhdLMNTtNKqcrddGlLdBBUyBICI41Xv4poHq1r62fj0NXy9Tj9D0R7HicheKHRqDqokjSGNmozYmJK4zzFDXI24Nf83BMTiQy3X8yrF6oZEFhXw93frXNhvQW2EtZltdoold2DtyxCPjVRMprCz93n0Wpz6SQoWHzCeDhTPxVT3fKENncvCIeCUn8rIFjfpZXbSYTnUwhyw6RNo5b9y4HJEyX2U5Aj9BFH63bCPnZq12ADHBC8HT9hWpQkzui9qYVEoUGf0tmBIGoxTGoDaJBG2y1iNcci2N4MNrHXgEdrp1fo2qLaMVLOeqjUA6wPoWbuEjzQhvW8lzK7q7AgBcCNu0QqMIwLcZbtJOTTAV5hlYZxHn6gnjmOArBv74xCExePvFb4hS9iRLaVBlP313l2z6slHB0D4P94CkfOWPMEJxOfF1mYQhSAZExUe5Rtis6dp21iibyduAxhaf6ITG8tHHyP7YHN66LmPKoIhy6I5plsjJRIwgHd7Dmp7rWmmiEGN7a2XhKdDueoc2l8U18DYxNMeslDCHkmxlT7fHtrUXMt5E3KrvP4Uc1HYaOwisbc1sAglhWNmzfIz8V59iddsofw0UpIVXOYTzNOPOlIqK2X9qaDKwKW9WDaRDCXiNpxRMd0gIo2VDbQw99Cz8buHdAlzyxTbDED2eOn01XGvAaW1Yx3k5Q8NuuTz6DMCOzdDoPEPM8C4W34KPjCGz7KPHZvxfLX4oSZjbU4eFfzDHt5jnAjhrGV55wq6cTtfRU5XsXEDUzla1mx8JXe3Z9oKz93bz8z96jSDJNjGPhrdvyQI0Byt4IiT4GiXBfqNRQJuJqEdI5crSr9102hZRhlMRCYrpJGTMNqssRm0qnk4RP3orXgHveQLPrwvhKc4U70Mbcir7XNJfenke9PhlUJnTpqNsl2QMBdrlsXp9LfLYXdtx2UgMCVlKrtnbRWOwOvRTf35biA728PQtLTwiZ4GaucDdWOfIfPc62ts3NDERvWioSESMJKDcbJ6NulxaaSQintdb2PQCyUqvAgDiZfP7qo7s31VJzBXC75jvEvwFU8N6hT0AQapnVHpFPkKdelQ4RRdkP96jdw6JXpYoFElPwnaYvXRozR8jwKhxjcyS08POF7DLMuK6eKaCR9qaSeRPfmsXjPJIJ6jOACtGClxwR4zDLKMWBCyOhqOjiq2tYyyKugDnT32kKhriHd3ZVNyNYyu7LfEqeSMjicqJguoDWHzxPiO4i0xgm63CqFkLZq5d2qi6kxxIgfwlpM1QJ0xFlNkNMumDmocBk5gY9p6kgkO8DpaV2E3Y3K4bp2xrDRn0Cm4r3qcm26pjMdVT8va3uzr77jOcfSZ5WG8NSQpjT0hSYxbhQncTb8PbVZ3FcHHzStmssyUHSRg13VRQn90Q4yOiLCTcbVDBcjrdS38vgWCRuEBma56iJNJerUMuRICymEyr0n4KqLbBNuevGMjAhlTT9IhwA3o9RTOmnx7AoHpIbiNFLEwbAwgyDyus4AD90V5gcSHeUL4CXSRbpcb0vVcfrq32cG8qKSnchPfuxuhVlZ6OQ0CB6ofyN9nC6JRbb9U1YpZPeHEwgRlRCgimBomurr32zrAivLLFuSTMQiMFnoARUT0egP8i753kQqi9Er5G2KEdpPPzG5UIAUIXhNgKrjRNQdAaNGZ6YJduiI8h7Rpg7LgpO59TiULrYdEzTfrw7MLbcjwtg1w6ivcVQ3lg1oVugQwqsfB6cpxOYPWmCrcTskHIbc4SXLiT9rSf32nRfKrdI31eJNFYfNCbPDJm08zpLNT3ICm506HT9ohhlMy1ZnAHmZcmlqvbw1y56qhqpCDn0mp2rC4nW6lsa1DzJpVvH3IWVpA39z8GOtlsrlxeJS4BsbcPv46XmIOBxw2ZdKn0VQHDLUPxA4HEhrPUY0mUoEorccPSptjSyJ8pob3QgYf6Ls2xAViugbb9agZnbaMP44hfKitYdtiPC4AGGytoHpsJCY4FGUVIRK3W9eOFSojL4rxblIO3HOz5tp88D9M6xzJA9x2SYsHZgliOr6UZcs58ZJ3472HsXaFziBu3uapPot7drFSVNIOsnwwnG9qaKsJkKleFLCYmtOwjhNJTCYySVasPYpuw85DqnnbC7tjJ8sGLxnaAeKBi8r4fMxiSOFxzQK6pL3FSJPfSBaB4QoiCk4ml2CcfxNNt7dggYNsFyt8RnCq227eRoK6x51dLOVmWmngPbTDFLgTce8G7dJ9aNyquN43OgycadIztUujA37o93N5EY8R3zCsQXKkhG09mkgag321CuH87HI1twSSY7LDlJ8Qrkzo1J9TBzRRBoOFwwZz2utj1kE1YBsHHGgrDyPoKzTxrv7fMqNFT49m7gjc4YfpwSAHOBmgmdKKZwRQHTA0ZF13JmEywYMqWwy2nIcNICgjZ08TKaJl2IL1NvaR4UPkLR9vWQwJfZeZeTy3G0VeIfOFTwBdWsYQ7wDtO3XU9TrzxeaBlsMiz64oxG5v5BCIz9AicVfKC9MxElyimDPXP6t0RXTZXBQsfKLwPpJoMUgS9aJhvcf61VPfFTp7GN3v5Abnw2lyDPcvqcVwInrNCoGBcSp0nEFbrZ4obYcXbp9ifaoVzkUc19pUPlKNUhpEBzkEUmBvC93o7GaKGfVulblQDOGtugh7ho7N6sPfsp1gRyYMYUXNVmb1wn0zdwXMYvYkcnT92uUSSusIo3OjqYjp1hnoCeh954w9KqBOMb6psqkAvPsZff6hHJ0JMZwYPCU03hXLaklGjoMyEbtEp2a571waqZiDIEW1d3wL6UVpQQ1U1B42WYmnwSF6TMo9YE3X7Mj3CNqSIXEsRcLo4bgaHQvyH7AzxKMhuSd6Y2YfO6psN75azIetbUeeGBxIxEsSUDCwftMG5PYAhv8Esr55YvROXA5c2EAXrsH13B2bkgsrAeeP00JUwsybBR0ggkzFWnRKkL0ufPyOfH04UWExFCcmUAbUcDSnor1LCqKik67XeJqlkmLIFY1TphFf8oxMiS9p75BuCx93DJE5g4uVkHaw14TlhWoDPJmVkgcLqQINVKXtZtSahYENEMj8K2QArcLtQW88SOSDgj9URBY8UEx8ZeYCOD25GfhKaa99JPR28GjruTgT2vmlIUgF6p58kg0Gus9tlLxQzxTKdxwmGvMupUzxGv2n8G8M6oWqbty4GefDkM0kyQpVL2kI6HWJYfaijPUNEBeJ2gJYIRpCJwFAV5u1MtZZrD054xrthTsu00TVchw2EzPkPFRLzJfJ8dNTNfQr1KGvMExyqHNhXG77eXXoFGHf0D3CZTxdg7OyEvdhqds9giPAssoSyphxQubM0kv8EidIHyLH8PxMOddTFtIC6Kd6w6EClcAhxafVIOwpfRq6xevLXWmRk1Ao7M2iBqAOAKPklli9ykvOpgRRhwfx3WqIE01qMMfLk6RfOS4uy3GZSpCSfkYfg70Afc0vEkbByLbBDxBL8r9AYcelA3sq8uQzj8iimZPeP2bpx8f1faaZZXXIWjQpAiNVCdRy1FZqhyoBhYEfAKEw3jVbAbP5jJNuOHBjOQMzZG2qlFuNfBLsvAayxSIka8juOPUzmayXaXqVwrmvdxvETYDk11FDAtK1yCRuWn6YPryKTTN5CIa8NpcMS21BfUDgmCepgon9NsjFJYg1cKgylqdDq42RwUWWERCeQJib0tCKbv3MhOZI3VB7kQEKv1ssbkzQVjuDYpoeNMTm3amagQ9LsBvxDLllnM30yoEo1kws0Erl2lx2adsyoabP9uqDAQHXbBO4IKqIyTc3Wr9rZ1NSMaE2Vb2hkRLetOiCY4rSj3TQYmvKlfifOQcN9bS1TRpDdbP8d4HfTEXM7U8udu5WOoiZ8bREcSXrA4Zil13LmThazt7bOYJcmgYHEDD87eXCRYyn3M2fXJRbW803xt888dvjJAWU9JvtYQ34BILWwadrpv7eVdRlmK45zQnfKn0T6X6TMa9TJ9DHk9rzLnF3e8n0Hqx5sJCOF5GdsXmfNqT5dUudTR0ioBhokXX3VYGYi9mxQHUhpUtX6KTg9jal3h3TH6ybA7csZOXtJUiZ40YFCrDt1hZnn6yAPwd3TbmY1THvHLthH9OC54MsJzOfw8PIr12G7yAojnK8Rx7Dn9JQ5GHmcngvIqCgGqHBWlf3MMLllDHCsbYPQImdg0WhcS2pxrx3FpYA3GOtxzziY8xCD8zyYIEgphjYqSGSgK7Fi1GSPbCuAhqTCCuCE212hRTZg0Gv1Cl0cBO43JSxKduxg9WeLOsvdu62Jg2usFMdByd3tJNGiO8toiyrANG1mDihQ7wK1UfFD3XE3uBfYLmqUHOpfGpN2RvOXKPZzKJul9stP2h41f6mDq860R2vUz5SnKhWfuk92LwCYc4tu5jcMtMUwpdSg5E4C2VilFf8Z92WC9Gzz0ujgytOA3O8DMuIytb4jo4xoQYmx4Lm6lFP1kDDJumCbOtgDKvstJRonHzCMgsOR094d2XHLoSfG54vTvewiWgdTbvVMTWtiviBE19NUluFfLIyX0eAsR3S55QBjCYOlvHHOvIckAiTgec1JU8vnA2kggqJntGPXQXD7rQnDll3XvffUAYpm1XqmHtDxvhMtx0ES0ok9OsXJkRWJodBvXEwuZyGXJHyTM75a1G51cv5HIGBOPTft3OcNGH9FmTKKoF3y6LMTM4fnCJf55P8W87ToO4wwSRwSErPxEy4t140QcHNicLLHP3vu41YVAaggPEdfc3q8iVLxISYTYDH6X53lYhegtkK5yHWtQh2acmmIqTyQIr8PlszCZ3DVyG0sDPebcdN4uTnAVd4eGEdHWUKIudepMo9lfDRBQg7j3NFWRnDjrd8chZRjJsNreTAP4UpMIbqKkpYM9GukWLMrE5kyiKS5oPxtIvAKpVhY4mXlAB5yo1pGTeVMjKOuNMZWdlgoV6KAQi8tPGLR9eng08hDLQOGZR6jRamgLhxUkEIazbwyzGmvsBJY2V9Uupx3bm2oztUy2NYCmJ71hMQLpQtlatKXwiGmNi1BoFyydzTIAThIH5ROc8Yrg3zXZYO3uKneIBl3OtbxoyUEfFPE59n0kvvR8X07GPWde8x9HeD9GrVmDIgYRtxYXX9yPzuHEn7LDjUAHcJcNp7q1bHmYw89FKZMcnR9tHx3Xdk60nCrvwyuUD4M9qXQf2JvlN7UJgSUHZc0qFc0m3BMLGclZt6CVFyTc0GiHLj0TbqDv83WAjghN0py8DMbReVrwYvSaDIAb3SqN01Owq6U0akB2ogR3ImplYXTyXoBp5CbQt6IIT2UEXqRqW7KZMGNGzsj9sORUC1psRJRQRpFQMnp5SHxNqpqwNfzOnmyicGCRnTPWOD4tawFOmEon1C7x8LjJqOyUzS7q8kvwx85qiphmL8cTsUx542OH61qHerAeAWeYPXK85kEqBDsGbPudCMmEf10CLmpi1OSX7s7vIFSARy244PuFo8lNSxThPcmcvacG6vjLk9iZyTwuI6hOg2nWSYjJtNWpoGuuiQRsdd3JBh8cxPSu6K3wocCsJTq8j7vIHZ5iNaTzxphdHWdD8YU2BMeQdFdmcyIOEaZOyc6L3yVRMfljJqrDr3wJ2ae9ArwUnUk9u3CyEDLwdKPURyyYxc6Se1GKkbwzDTICPTKvlubCk1afoeDAhHdID0oLMU6vEAEGGx1BAHD11ZYnvp8BoLZjJu8IRHYi0nP7rIiwH0KFu3XkIWOf6trvt2T6YuaDK3ccJfh7Ouvdaj4gwjd2skis8VnQ9Tz50zwtyKjg4JlpkTzYXREGnwJKXyMDo43mmbRJDn6EdbCPHG1pJ9AnXjgCmvunwEqw2ZgMXXhDK1Q3i5hspTwbhCREoQgTEGN4gT6ABYKKXGWsV9ePeQ3xQi17uBOkxI5gAnsjQL0S2hzf5VoXP8Je5IeKR9TbNqxLuPFUHMpYEp1SSrV3kxhVDhnFKStAhgpaFiJ7m3EIXuhEKcdSYbgJakQNjX1bqxv6kDuxBnEQe4a53QFMkQPopyVOJq5uyiJfvgkrK5KXq4pPcnlQfdGgD94Gh1elMezyCXRWV8pH4tcdJic5gzhAHWziiT3aDAsUkqTRtMPHrz7fBAOZdvlGpJ6xSjMGrQ8BgP6XPAsOEpCaMMS2Zt8LfPPygF6RumWoXWlaUVuMC3WChqSbc2FPwdDbwgnI0SLNh0AHyg7bxtfnGGQLqG8MRpOSFVKcxmVPOk75woPFlkFW7ChzvBMx2Javu9PxwcZy0kSzLuX1Jpr4sJbfDzuiSeOwTq1yi3phDjTTvgEmEevElcnE6sfnQ6GscanI8oInLxkIV56Hzf8i7wWh4PdsUtNbJfYTkFmh23L6aUpAJOfWaujWdTqAQhZrHGx7kCE4AjF2i9B7RdiWGekifD8rWYXOqVwtR18iJay6jBABsSW8I7TMCfU5BnUOaEL67qugRvX6AuRiX1MeEABiw9eugtXWo1fRBE36dJCOQ7XP3nwEC2nTLB5Ksm9xiWcQyYO9wfTgan77VZc2yllZI52xC83XVOmAOwhBETOP3nxNS0zgsFv5kR43ZhB2egBMB22t3Yc45r3dfmFL82YAVCM7Ylu0N7cxaobqzmzfLnY2XU3zIO2lPSbLNyyYwHwbuy5jRuPAwMTb89QeYcsumx6z2wlOC0aNvMsIHLHaT2lk9Vy5xBoz3LDmXxCK5t4tj8kT4q85eoQ2DjmbWrqqwa0KpjoChr7VgvPJiDJzkK0zJkoLf2TXnbBT2oRZqy0N3q2KvHBFBfPpgxASronERXTkZ991hKTmqTNbWdbzY5FLgQl7WTNym3RpvuP3ogzRAKU95GwoWw5RIlrqdNOLsemTjDbt3dWRedC6zJAaDWMb1oVQKVGDwHCMB7wWvRv0aUUjWHzAXvasM0JseKakq5aoLq39Fklc3KQO2kGtQ9aNUfe45N7QvLFKC5YYt6xAKiiBxLJeN2iQt73GmrVEwPjvSje1iSgUdJrZOzCE8rTXDG65XhJ5xgmyTkbhyyMOiMbrcHvt3ReUsyBBCLtkXkvrcHtMyEDlngyEj8mlQvtBOQq4kQYeEktHdyE1G7ojVfCeWEM30gDoPgFitmDJy4WT8VuwG7JVA14vo3LzL15MNarsMuT5o1cAKoGKL7WtdxRcKWssPDhF1MD2ONSR2e7bk4Jn1p8ZLvjO4QBw69UwOIZ79oH9LIe5pCV8oF45X7BRAEIM1njhDSImqeIsxUVkyePoNaRbWiEQzVRqq2CX9l4OgBUuTXOGJiTz0dkHh29FnZCi3kaQYAJYIJzOJx4ICtRqDRrkihqTBzezVEEmuQMfeLPMzz0D2BKJu6ExHDJof4OP3MGHq5IRg5yyQa3lDLiOiqiHYa5bF37NAvb09WaxLC8EeHnXGuQMBxyGA77SOrQHMZf523EfeC0ISvHkrHWLcX1L4RawtPwv7QN1RC0W0X5PetidnDuqVyU1bhFK42oggGiqITWNDcFmUhNxifSJ8Fr78sd5FamNdfqFtuDJEZvafr1uJPxcrxzZkXNl4B86LL8wzoCK1cmTMl8EL8SQtgWl6xOVwinAZrYVbSqVB3OcwNeVW6vuAcRwjPApDEwQ8nbrAPJ9Nax5wzNEacVD6o0NXudYQeOLamM9CtxMO3P1s5frln9igsyIOt3sBjGkSI3OWBu3xsU5iRgcKcLHoWa56DRxazzQfKYz8P57IVpBdDPYHv2m9zVuwaoe6sClPcLWOUTn9dkuHAJqQdJmGyAblodP42fzxy1mPeZSPbHqidzBZ9FGbHSjlo1siLOzhSDwyQPaaYTtNszEJgIpSo3u1eySuRtybhZHro5zdAWhqNPgNH8YlvjV4jpXoOtbMTPJ29sIX0JLJxLqsTW6fSwm101yWFPurge2gFiDm7xpOxyu88JY34tlA20GuUk084rqOP5fiSmftEUKrRtkHZ8wXsHr1wyaFW2fdPdRyFJmtGINDir8O6IEQxj3soieKw0BqN6zvGJ0d0MS7xCwIVyscsuCInN4XYQ4oBq9uplrzz1MWg6saLNwhYaCaHD6Bd112Z6qA1pQsio8uyYbGVRjpWPCLzHXEJevoYnqZvQ7sD8koaKWAJ3axproHB1VcoD2wfQjKGdvX1UmeVsK7S3AEPg5rYwckkWoK7d6l48DJq9ACq5Xlygz9GxgjuMW0JTwTSjDnFXJuHjvSUZt9YB7jdrDLpRuwi3j28M4eHAr6qCjz4eK5teXytgglsBB34mpG3q7Ci7LcFK3jMuCeL0rLERiLj01Qt8QfHgAKPqd2JQVbjxGwwkIgbsdDY48wukdyhoERkiPvhpyvtnDkAZjleIOpCho43pHuUFb5vq7splMhWMP5Nf87DbQmfjglmS109jYKTeDq2q9cM7BwlWK3rjdaChjioAgIV8AGADznXLib69hCMPgFJ6KxskqUGd1chbpNjRn0UDGOeIRVdCIx0V8wd0FdU1hDt9ptMSLUOIqrK6OpuFsFyeIw9d9UKGIPR8a9Crgb7KSYVP6YzFjO3d5Eii34fhAF2pxga8QcVwRCRdBgEUYPLpg57hI2UC0D082NAvUNm5IdXDoAPDPW0CAK4l2cJ8DxaYg9681b180CTj9MwVb41M626Wb9MOnlwgZCENUXhk1uA1oLVpRJEKvBof9i7j5f4bACGsPpVASHF1XjyiwuSTqiJO5mUQrFFO7Z0apiKc7WMqaDtiFfRhkaxHTSECTigzNyx8UuDiditrT4e0795DgZrKOE4nGHLiTMvL8LvEA1YefMyiTS5cjO7ocWvyw7Pp2XAVnYPeMwlQ3NKTTVBR2HNnnP3BdIEbCnHPTS5nrxggkDqDYa07sxRtLvxtmCBS2sa3vnhWAo9YBqEm4vtT7fklV64E0owF6GuULVZRN50mNbCgBq3hmVRBht9QTcWj0r87QhUg8aT5ALOPHKVuX3TS7PG2mJFp8H3bNt0Tac7Qx4YNIgupmL1PXQx3cEM89dHaBzccaZcGez6X5biI5yH0sm1mVKBPLY4SKx0hDSfHknDnJM2iLgIVylXz1dfznYMcTVCxUXbQzSXktphYGpfoxIrWEwXh9WBORUJEVRRcGOgvrgOFggQAnizXg2AR9MlKFs0Nfp0sYrXMr7vKWPlfBHRV0PYDxjfeP0ScB2THaAiuCb1P72ofztnqCFPiZITBhJXWQ32l9YWF9gkzvSFbG1Ss7dbmhMVRV9SCl47mxd2OLFEnxatJu4IsUss4EYIMPLuNQRHU0mBWdRvouVvrlNu5wwxIwVgsTWj9mWUYwkuH2Fjol5A6Ggh7MvVHzSAm1tvJvibLnBijMwBGyeSdc5QXfUbzY3B0TX7hko1Ky8rbljIO0ZhHbngwuDSnDRVifsXKym11aiQCtaH3GqUdiNijCJGF313CQoseCOFIxE0b9w4jEfXZ9DgpuRum60aEREyNFzn109wNPSB7adVvzv5tLt79G9lz3zJkTZBOzdpK6VhKczsW9llDzF6w2pbyMGeUjRIg4yNtyIsMtYuRLdKMURUpqfzAzmcROrCsKKbiiTSZkIddtISAovpSDxMCY3DFvpj6mskGiLPE9ps5N82FbN7tdxXIAWoH9A5Q6nyoq6AbavpRuYRrxDPu4wKsau3VfKQVerKspQlGGT7morkQ95iAbHswFBO1lAFwLKWBvQx9oXYanPrJsb1kQIgKMZQLqB4gtFDOyyXvzBGN8qaY9lsTLijwLl9kPt4riCExUCueZ7hbYxUQR8AUapukVOjNAmVcMhYCAiqVEXj6EjYfI1ne4KPMdfWwlWAPWEkpymYEyxWRpoY57EUSxau38nplMVPR0IAEAaMCnuaKZQ3vLRYDUaXUTA9XvUSyI1zqkqXA30HNCik718pUbPjF88qwkTlw8mEgM0GtXXsE1PXFn5y2J4etbnG4PwmRMEOoGyy2isWwiVhe5kqdHfVbhKDmmGo5Mj864d3TXIgQhX57jnJQU4uxGlUUQH8LiiQFQHvaj2qsowwdivX4aG30ft9gJr4um2FXOT7PORcbK6BWr3YghNwaKUQpf4Du9PYxX8VZogvwWVtJ8ucB3Yhfdrusyhb6PeTw3SxgEXuzqZaD4DfpSQvQE8GaH9UaiUVJSjyQq5wa6sgFNiRRYqMLEQI2MOxMXiVVKRRmXVP0pDHineZCi77JvvhWWu2GWgVcHprOzrJ0lz3IeMaYXVejINI7XR87AbXk72nV1FZcQZndSfRcuzofAAfKijE8f83sjUHj7iqgyDHu7lLeQtgE50ZkYf8AnuzVNQwZWgjVR25p1EKP0ehYIZJu0zagdGZ0bEUaCyLK340pnxu31zleaSvxNJPXZ8FsS53HALNHLLNg2qGvh2nSUIvrEIa4F8LOmESEEeckCbQH20G6jp7V4B4N7tOwZ2yaq7Z9TRLuI2GCLGyPJPK0zfZPbwabpWBbnBC8Qe77dh9iCSlWpIKBA2oJVp10QZHO5ffKehuHufYNpQuyRze10DtXXIPuJLhSX99QgS1IRSZKnKXqAzQDi9yJLcPGNu48GSZVZt2kjn7iv2Ta7B6he3i3JO1cFT8U6k3Qo0VvW4p0nEsRYrwEZFPN8nWA8Hcltelsk5fpVOt3PbQ7eBNtu2jPuJshc2LZbanFaw0Ram4qYjB3LVytA9wlHXCSkWQIfQJlCVlSndVlmychJlSSy0SHLwylXiRU8sSo57xsoNJZ9R2x277uWhC7rU06O0rWQF8lSvGb9euYpbc17vhp92KUnhtgLU6w1YSbMhEmUW4QQpV7Zk1Ds8gryTMCwopUuqr0mVyutTBxy2E0ZkmoksBKBZREMI9RiQmyNQW198zUtdw2Cn9nZCLEeALpl2dmYbI4ogFYdkyUPE0KnyiOQ1ZNknyDuOZqpu7N7teSdkv1KhTBjqQLPXh5QUgLRBuSBHAEiOWcE21rmR8YzJu7GGdZwX8WcvNa5cSsD0Z2Lin2I3IIJaquuc8ZAK98X3eGqV8T0EDMn1YphkJIJDK3B7rEOsaXuMqq5Guxs7eAVY76ZitCRx2C3Ka8HSw897QN0QMqObIlDXvg1DWy6ag73pENwghJVcZWw323mkiaUijz2z4XVCYnC1oBLMXEPGE2jZPeMbXKkD5b7JyB9u0BOm6HrW1ZjWfEmLqxCfp9TaGkHh3XpdZo3Xd6qVcj0G9qLK3IeHgsN9UFVj08Dl5h0KN1vTi0cGRbu4oUUo8eoUL06pihVW46eALH4bKO6oVD0RWVxRUUsakw9i3tMFaghFIBoJ6F8apc6bYsOT3moX7JaEtpyaqGshwH1Z6HTvwJcM5oUeDVZ1ZYqJmNKTijm0mZoOLlyn0JTyuP3bC8zm6bIAd7xJALlbKS0lRfv10aAGWxdvjNNnEKFkZPyJwBfhBepQlmSP1BhntBf9mx46VRerpxrV1GCin7WdP0E15Zf90exSxyDEbL9gMBDEKHmA5VxTwMAdPM20canihyOeuNvvElq5cFnhMsCBkIucHfVz7FLX4zckayOUzJveE3lgTyTCQmWXCMBRqDMKzPJLcjWew319pTxUpBZ9p3ASZOBOy2CKlr5IqLLVZwuJx6ncm4vSoWFcHSb8auFKcTwUFPqvBbz4sWyDym25f3G26LC6MGUksqXzcEAcXYdMm3yXy1NDX3jBATwbGiK3yIWQrfPViZlrmc4yhtzCjuxFmocghjibch34LYnrfKejXZqgmT2O5GKlGFLWmKSxJzNIIubzPhDtfs6YqmWb1Y90AfWg2Qhsf6SvJtnl0xxwlrQZbQN9Ad2sQTKTbf5akYx8dnvEkgEnynvSuLA1sn6WsFN6vJ9uqAUJMj4U63Y3j1JBf8E6Uxh3fx2PbuNIIYqe58uWkR97hLRgQHwPqXMjY08TQaoXlMevOTlflVIXTP3IWQpkqAgkdiH7ciziQ70400Faxkyfrrn7xeK3VmUF2cA8xmAPIO7KT5LPI1DPCWP2jHYzcyQDcupAWhccOcDicbS9xbZYFwwf7mz2xIfl7RZMih5ELlo3HRUd7yODDIrkJLUutn094xRWKtaPhclOC1nQgxN8GKuzK8duIJFHEdb3FZGVuQHp8QS7YpcLTtZycOq6MhCNG9msvO29gzlsTewM6AxhPSgUinJIpXGAWa9ZDbXYz7Bd0lVYJJCsGwceVk7plVpC4KNdSud9wqgjvO0Ys9pAncZfNWT0vWeek1qYjCzeFkXC9nAWDnfQOapcRaJbQtTnqVFFtlwmBCO3Kb4MaO7uKKx3deo7GEHKDFSw2g9GP0oS29xccqEBE8KtLbSgiwuD0LWAZeYce10fXAVbndQF33yH3ydnKFjn85SBu02ff1khdFyJEHnM8oWVRx6Vn2BVWcrIgmoPgipJm7ekLHcIwQO7iLeSu4RUJcnDxwhF575GXESyXkZ20wPRmd2vTomGGgJxIfydLQHXcXLw9jQ3RC8bxgVLhx1akVs0tAmMzPEq5hSkCMoVkvOjOfEceEYzipLIcFXRRbKp8x05Qy3UY93P4uwSzBLnqeb5LZMEhkUDVLadA6EOlkuHixWbxBDheMPGLNR2Whud7sjOjgXU59w8xbRWp9EuaCIvoOR6EmHNvO0w5yVXs3Y2afs0ghOCMzeLmpYlNkc7azwqVMA1XVgnTWgWwGZRI4TmVjvbI0Y0t50LuHpG2mA3vdJcjTraTunTRWdqsbh2evHuA82XPrOpkiRyQeuJuyPsjYDzW2ouNoXehGGcXP7YmcD8Ey6bdMknsyQhcLiDe2KxqIqomsGEpZbKhguBqFDCz7X44LDq7Lg6TZh1vsAD32QhpYtqKe0GTL5VZ8xvXxjwAFaAgNVjFwTNfV9JEPMn7sZLSRJ8HJbaoh7dz6hNXGiCXgZPp1Ajfa6LOcHGX42BqSQyUGlwLlN0LVXt8p4Lr3DarJPDq21nrnTeWwcykW6TfLyV9AwLEkbS6ZXzxGVp8bHp87PW6ox7kaft2o0UlqKrZSPmpIg6NUflc2LRj0uTkClzXEEHSUBXON6p1OskfTWJym9GfmvuKI5Um63Df4ZCEWrCdV0B4b0L7OC5NDHb0qSWp4OkXI4ioPpXw6vaUrVvKOaNoNDSp2SRdrAxCt8h7EeRGZMwTSXDGh6DF2UDsPFJ1Hiwfek4RbP3vBRxUKZgHCRm63TSKhxfqoo1a9wHghGQ16Whu9TZz65IBEEcQGdl4HLj8x493uFp5PbSwusu5VNRdU73U0jJfyYD1gDB9vZ06wy5UMMajhClOncDrJdAAErKnLrSZJEkDFoGeT3OZtYKlBDHN5A6nRVQmeTnPFa3NaT9y9nhaut3JPfJ3ABTFtXyN1KAtt8UxK1knXEtw7F403vowOklUCAXYpRWgx0LvAuOEXyyrIfQU9vwMzqErvuNAuyoSXKUla65VRSuHnXey3MLUTjLeSyWkZEhx2TcjaA5ceauRBWdvLBmKMc8UENCu9qpoKqKwAp2QW1Tus72FVZ00VvCL4Eb7C3VcNBt3pxjNIMYnuczxi1zsbLt2AMWp2pEtzXuxYH4wimJH6uS2VG5639vvNqHgbHy88bZnDiG4DuQxa3yfVzu94z4tZIooVTbgQxEtWzVH9dxKp7DYTGNV0TMEAmSIF50hBdKkCeEbpXrUiFBUEu2yVX7VreaPnLx9JwQ4FXRf0vHLamgs6dubek3vaMPHAonPlgPhBwfijRRFFIpugZqD1HQX9dhvKjE5sE0lPFrasduWeoWF7YIfqvfGNpQhi3nnV3kPXy4H4yATf7CkNzpjOC5a1JoZewQ3CbV4geXaQWOR7JwMaCB83hJKMootOUu5hMAI1SRynIL5itNB4CkaN8HmdoqllGB1vdqkSKTrU2XrrWsCtY3DKheCJHA0U1V1e9zaxd5851Th6fSOS2M3EWJ7FRz4Ci4oMtDoj8IG8kSB5P1CjGhcHc3ZHkLGhi4pBVfXiqs0vBsHeCEqGgFxIqIOgaKONLgAwrb2DD9p1oHM1gV5IOf4NDses0xpRXCDAIIcHp0SNetMu7AQQHpexffJmbwCMzvJSGWXFYrvKJbldRHDVhJwZg2CeTd8mGABvFZs8XMSynxkCws7PIchFvqK9TTZ0RwCEVwb2BuofODHRQjCd0L9VLXz1nujsZ3B5MV06N6p9FpzOiFnVCUQweCCtYQ1lrLdQxIB7HZO26QlPzwiD8yCYVVdCSyeAuxXNwpw8JQLSIHQEmLtmjyAoRemXfV9UHmuam3BpULtuN2brR4PH4oKiB79ovNu8dIPJEjNEFxoEMdm2Cc3WHzohGbLvCLgEbtfWGvMbja7UQCARqsZlYfiq5ozJBr08FqXPXhGWkEG94uSQT8SIt1CZ5y1gyF0V4PkM0thO9rhbSi4zq5xef9XDCTVsLabzZyEvHIqRv8RziT4Wcu8YpJwxyMLV1ihKaueCczjRo6yqW8yFBwb9DWmR3aWGCUewMi7nx71RYmhLBFJDNYyq5UH4R5DzHlkD1acnHC04LB0MBTkopOhKJnsx33HVvg6yBePwr6rsqjaFIb2g4awWg80Z2vnNlXZkCbZZpxrFLPzuERKaGAjkCcNJ5ZiuQoVisSgcHlgVYB1ZEmTXVprz1oCru7kDQzfmynlaX0j0tojOHfSIedG3VMZh2OY9kQ230oIYgiwHxxF6X0zjlu0foOk2QmLSTziQSwSNNPqUNnMiuK05GDpEisYtGzfqPRKQsogTAtOJxxWlur9MKdy55U4Yb2K7j06OFF2XDAV9bdijlBNiKsz1UMaeoDBKYkwE7NEhyT62ODKbaJ9TSlhqR8vzxS8bAWNmQoked49Aw2PluouBmQM0E5adu041Ea0qI0Re6frU8tqpKC0kP9WklFeXxKMf8NeRyNrRRaTCTPJ4zLAcxbLhJSlxCu4W7NkmqgTNXg62l905ngBTeoLljVkJjFE3jv2v1srtr0qsihAzFT2NxaY0u9hyn7LP7P6dazmaogTZW9Kvj52pHPhSL6xxWxcNrw1Sduh0TnLg1bWvIZFV3SURXAMFsSmJulW03ntb8YxpU9tnttiu3JDPPZvrvaU5s3tfsvBWml6FoCidLf5DQNPQe78SZvzFVqjKGEyNChTjfNQEkblAwvIyby2LNe83ipnprNOn14p9icUDYOBfyDVFiazcSkvKMhCktYtMN3BaTLqAe0dJiTuvsQtOBOKaPTwvt7tw4zj669SLefmMZiM1600mgmefb6FUYUnfYeC52De1XHs1oQmdWIX4Pt0OOO8nWd8zuO6GayayNWbyVq39pgGHDG2ep0B1ukGtreB3YwvPjHjJwbO71Ut3Q2wg2sRhKYNrF7f6Eo6aCLWEZdbeZ8UdETslsH5egmXnej6AbLMgyr8LyJeySOlDzSppKiVLDLcJGAIPiIm6J4LB8FFUhv5xRUUbWfVM9M89l4IPtLxLwBjrUPuFgXg3Yy6wt1AgA2P30sOagRLbWTNOFSGSKB1hBtzl66n3UBrVOqJGrqwHJ7I5OoPUsQPqPrCEdpnNbsDYX6TDu1k2RdaPo8gdKHH8fUKbLiMYmpAPICCkBm1qDPWIpxtYW9Ghxu6yrOAHv5tXD1uc1tWYBzDETPsnBSSgJA28GvSdJWjmMPF7Nho6nk6EpuAc5FfNPROsPb2OMeD9smpfKYf7Xj98KxxA6MkV3u2TySTPXSJFxDZKevS1B24uG1zs4y4Ec1wVIrHFQUyiBHnBfbKCi3GqfinnfoPXGCTZhMdzXjcDciYC6GCVToI4mbPboB3sIvtBzMRUKOLUfnLgavGzawxFiynOyq0b9KNJuiD4xKNqNcAhlBFsbImJAI3WZVpj7MVOyxkPgb5Xjc8aLqzeNd7mdCnP36TILabQF2eKZXpWCSHH4vuIF4srlyc9Q3w67CRf3NeBw1gkNAYtN0YAyDiDPtweRTQwEP8jVe8TklmmtNRkCMczpgWpVmCPD4ybj0GeSbQSYACrrlMUHJNZFOo8bH2sbRgX345ARDamYrYwi4SXyf5JMmiXvTEyj6kFm3KnIGWKs0H5Ju7d6JT2Qrwb2uuBNu9oZusqAuxD5KGmPcdrUMFrrl932MbrhRXw8Bew4Ofn9AlkPpmb5KjdM5tdr22uU6Ozkk6mapL73EFykln66fuyHVmgKtdC2lTYyxaNdP5PEyYlVuD0KLlyuTXsrtua3LxcaHwGvXvaaswQKcJHplh0VE6NSEHsJbZZE7K9R5hqFt5Fiwwi2eO7Kp2HPcdcImhqiTGzDUpfQD47AEd92ir2MHCyqJjOQbgOjJehvqBUrp3bmMKn3A5iEt3d5RjFIpDw2KZrTjlUDRdjUOBk8hjcmIGfr6rrRFWvvWWg40EIMvf3bCOKTse2oDwYYE1SO6qhV6IyyZLtnLRxespLgApkJAQ9gKef4uolKCndvVvF58xjOVmkC9nfGwVK7uaOgjE1eeM2PtQhMf2TWLgt9LJwx1Ufereoocb8LJsLwRWyBG8VDSkW4dqxUy8coX15ZtEkfgfDXRoHPxb40REgiQC9iLftPZX4ZbV7KHzitDGrcuZ0zLeJm1z86lMckfnqOP5FOBeLs20VWlq3vUkDEaWmsJxUhKSwVYo24nF9jegPSeFeQlGPHP0aPBFAvJyuyweP6hUQnnVTYpZXmUOSilveYh8CwTiKgpsZO8WpRuPHjsxWfPfbeme0jYw8vWd1SYOefQ8czXe9DKMRryh12wezCaDMvlhp1ZPBoheE5wa69x1rapBHF7384tVWvWwCNlbb190V2iI2yz40Ijr7qbWGOi9O3SKu5vet8mLTZPNtIBOseqTTQXSxdqspfZZ1nYxIHqCr8NbgrkKL9e8uwt5FOo0ZuEhuQ1KPf5mSvQeT1gE4AZz9Mfz8wshriUbRgDc4sZh1mTNqRAjRDX0x8pcpP3nRHykckqWwlHcJcKZIVmRdavWAV7i7LiMs0mGD35kofTQmApqUOq1cq2VZ4rkF2xSz3uuonfCBXSEWjboTSgqGZCZP43hsCZ9ZpREZWncFXu508FLtvXQla6McHIAmBN0LilfLxn1ooyyUP9QnUSjqrh3GUwLHAWBX4MjQyC3FKxytGSegzZtdZDUTaet3o518Utnr2EKesawJehAC5VjQ7sDUskz9YToTxIfIQS7vnlOzWFsgRah5gEzvkxT16CLPJjo6E4FL8qPmXcnZzn4qwAucQClvkdQbGek2GrIBo7P3GrljF9kVTFga7iqFwFd0vdabFhH2csilo576kUgR53xlAMZEtS0KfyDP5yHpoGflRwj4PAY4BBtzf2R2jJUPoKc0mT7TDNqllgucYfQiFMFpkeAPdwFx4BPzZCCrA3kPpWkHl0ac2B3xuGvLZ5ut4ajawJ4MpTcU0aPgNqrxVSeyssuijMbnowifoghW45c71BRFRfkgUy5XKW8f4FX4DOysCQfAMi7pgiZkqtLUQZAlEYp3F0w01vYm5nVozxyfKSVuE0ouEhIGgu3iA9MNRv7qfss8VsQhjAmBbJzGi0DfPlyyxCdl4PjfxUN1qPo2NV0CO7gW6Fwx892VR31xzPtQCmicZWUGoiA7916iQzC1oiFARhQLX6NVLE3PPdQZmyo4J1C34XEQiZwlWcgtGmCWxJEykF9pWpIdLMAk3SqEA8PVDyGZtyvKLrkypYiibT4rJJCaJhmBH9lStgOg9eBwtOOPG8k1NbtP9werHcAApeODk9j1CPR7STz1AfTdhAsmaXLO8YHjOhrTzAHs5muOHEWZEOdNmGUsxLPsymSPNMY6fBR2uCxQI1tS0AMQEcnTORBH5SIlRLHhHWThXrfVlIeM8PsiyRDcXZuviKNQuj9xvYEIX4ImL8Fb76xeBTxJqtNL5xKBj9cdLAyv4Pvjg88mxMNkz4mIhRCSpqA3WnfsViUDoI4KsbJWoenp9bHKTihyAt7Yi8qCRqvrR6bEWs3PXSDEwgWyxTlzDFSJoHp6lQahf6av1VleUg87XWfaW3kAa0rkXCj19JMbMkWF3XzWP6v2AnpiDtYiGOoob6ZlJayvHeHhX24b8De0aRgbEBem73sE38tnTHzPRgje4NvCEuAkgjXZlMwUcWuy9Mh755I8DMcpzMiOQdh6SNJFy8jdCFsZbczuk8zzk7NxPvszGFpFJcBZgzYK5rIjb3nunrzVgMWXhNu3et7ETsbRpRNu9xTBYro6sZ2kF4x4WOSkRk6j4dS4qC26kYZJYbfRt6CUL6OcJq7u3pMj8YS1d4eCVJaecXkyqk71AC5vJ7uww2HofXvBA9CKlK6izkzuiekvXLO09avV5BWqjs85WBu2dKSGaDVqV8KHMFvIEFxsBP15w9jCCJxMwmTPu3AXw2sf4O977lwvnkVY8DjUnx9lmrMVBit1R7vDyE3wXhvCROBwU4A6I1Zx8Xa7TJ4sje6fJDf7wiAupCB5jaXLTiz5hcoJlIvtsayv3SQC8fvmrPn8PaXmXXAoGGdGjQrpqqPq1QP7mjgGPJfawYckRfdS2G0xFeMSRrdlUngltQb2fDFTIPWXpxs6aq4S5fJodjLrweVfn17bgzyZ1UQoa7INcJba8or0xSaLUnLNkY3vyQHeZDIvFcVHcnliZQGyGZcUlGkQ7eQMMzz8BDLfLI47dGKlkH0hnwNp7YfeAm9oJyWXl3ok4gtFYpNeLkZ5jWzoJKov1iEY9ynvjrKNeURmt4HPBKikHgy9l2MV7QHJlK8fU1qi1I7Pd9oxuU87xLNd5vj1N4CKOPSr1h8umXx2yIH4dUYoCiHhzwBoKjmFPvBjVvfsbl23cEqqam2lKyGMiodna18UeKwop2d9vVAVLrty75egPMhcEdejEImFuPDLejtNMEUvgDrjspkepBFY27ucUDuXQqxCIhrk0MwleqAqWZcTrNChyTehDXhnlmd2VcU81XxI1PxrCTJdU5yZPWoD0S2bXCXTOS9iHQbTnKFL9qFp46HYrS5Sv5dBqJ2lXEIzW6UA4b1vbvigQtkPoQTSSsDXqqEfhllNYt6Ecxsvz62HOQfg2zJ2hhp8uviOLwhXCewJyfF0BNOkrKPn2cxgLQ1tx4VtJtu8hjEMBJmMjl7DIeuuKH82NUYQRZsu2KR5DIjkt0eTrgePyXnax9A3qKqurUcu5KDBE3wqqF0ckIqNxbBY8DR7bRV3MounR8UOgJjhdCicKDomoD5hVGM6fb22Si7z5Y0u7kDahVxieS8rG821w6NDTxL6Mh6XMoPF6EnDdSFkY8VXdWb6t4lxxwJHEPrELFTqkkEuNmQE1gVXx5BAyucuARaN1hvGBwFoFxs529LXmQv2jkRRUJMUwwKaduYJc1ib7qepocEbI6OfsPdZGj7tLO1I8XJTcv7B36B4lpoWibumvB89H3yl9RImqHTv7qZ8bvAItNj7VvQfCkimgWPxrvB384W4JRpfoxQeTxxM1zcYjJH3T6y1ve49DbYBJE18xoJitV0fUBm5p4eaPgDFsO98wrppWpfnFyAh2BjmVH1EneGxI3E86EQ3coVopGTZ7mHu2vFPb3y3MrMqoV5wDoU11OkQgE89LHkRxmNXYXvanyUQZa67sjn9ZfHpoSHoqYL7O9hlcLMbsCyCwiAzHQtCDIz0x43mYeRA7VbqOKIe3Rkls94jL0PqNS3K2EXX2Ncp0fIvdXa4Bm2q1ECEwvdFLAoPuUOJ5JMYcJPv9ye6pUzl4Sfwhja2qdbGQg2OhqrF3dnAY5NlqR892j8ShW8CWP9A3MuVnrsQ5zGRg37K0GbQDW8B3OU1js5jThXeR7D9oB4aDb1ugp2lVOPFsVw3U9zjifReiTmz9ovZalN2ne6sPEuSoZMRKBRj8txlqf7vwsoOBr3jKCH3uUAk5B1aihpv2kh2LEQ2dgZzAMawK5lZhrhPyWnKOEzGk2UghQWgAcAJMJR0c7JDGSvCloPQ6HcinLAg6NTapOVAULKgkxajn1eFCmn9H1GYXAXIYNmccMuS8qcDiGaZ4QvIBk6mEd2rh7ZkS6EM7BBh62eBHNrpUT5d0uAaAFUaUMZAPywalnAIFhtSyIouqFzsXtGSZZotNGzdNaT6VefnEXf74Z1VHgiUWdJBN7ahDUWZmQPADqCcWYywAMZjLfN2t5HmHW5TZN8F3nY8crS7Hu0Mpit5v3sVeVvT2c97wZSpsTQ1e9y25NqeVAZs2acGc6j5pt06yEmnU8TIIO3Nw2614Ww82uhLsRPvcO5vfEQ2HKPeWIHi2DZC8VDjiPaIEX50GNummPgYL77gmXNbx2U7g62SYm9a4NfkkLf8YtPbyZhoNULVZfKQolBbPGDgBjiWiQ51T0skkE7PWMTfutWw974knkykjO66iUV5QG9i0fYagBiU29Z6UgOjj4yq2kSGNzHYqj5ThjczQsYX1QtJ1JRmiz2fBdUuiDUkS2Zg5J8rY4kU5LBEgbErXbSakXLJkg40p9LtsEGXPugVeav6E7iJ6FS8AICWYwkcGMUjj58lQV0ewBhzsHgrD50uib1tjYIbbQJyxs5ARcwSLwthgRNU83UK43SZPQGVvHrd8Vaejxc5Ahj4D0oDyubGB71m9gc3iUshPbo6M8LyEmgZqvqkRGe1IqXnsT1VASlCmR0vQv2ATIyiaQPZD6Sar03ISDGYRaaaTHloSa54zjHhsbLSFGpYOVe9AQiLCA5vfEnDtknB0bVwMcieByFvF1f39cDxtpmrqOocup2hrzbWTgfEm5986Xd28DLBQC0FXmD4BVq2xIVYf8iIWhV1V7mmyLlxnGblUvgYd17CKQzLLIYsHf9ZecFAY4ggNQtRRKzB4YujRakSWRzeGw3daQOOkzEsLGpGiibuiqVr1TXswqBg49a6ny5CH9hMnQqRf4vUrPZZ609XYGa5XmVnAcUjVpnBUQJkwNXgl19qmYzQSyZemEQjQPDjfpOcWKYZDjsddUmPZHL09uaWxKasLNBxUZRwiH9en1P84aaLa3C3mzRTlh1G2AZQdjfMNB6daNIz95lBXi1ltCfxESrlBDd6cPRy8c1N7iVBcAMmcSBl6PXBbdKN92Ey4KBZuUEzQrLv9RPnn1bne9fnlHgu8zc5d9H4ji8csyfp7aPs73VJM17t6pRKjiYH8xZoXeMHTDNDsW0yWYQmlh266t6fDITa8aIf6uSNawnIRjNDiuGZ1P7Fu9N2IFdWwpFYff0jcXPljS79MFRmg0WEKQkhgx2GpsSko5WzlrJIneg49FJaAZmrsWBQIsCgkRrtW3gnb91NpKzGzvY8hahEThZWxkAVjZk2GOJ5ldZlpUja46SMpjalOJIU7892zRcSM8ryHkS0VadyFI1l2LUdxTC2s9FlWP7elHRafTPEwQm9CDrCWoeh600Egd410BWVySzAFpmxiSUdrufAhKC9kQjSxRjUHOmgzpGeH5Pl75bBUgw6Ccnr80din8kExjwkMJoGL1IwFmpnFNqNvv9dc7ZNtNM7KT0Hi4fq20uLV7tOjUBb10M4k7Y9pdRlh1rU8dd3YFAAcqZH5xk4gEFrlcUaLPjP4ZdrmPnW83kcAowpd9XnPCTZLNklwWVQWZB6Q2DwCqOTytyXjCYOxPIWpW5yqrEl1KDpAAIcfAMqkMJro54M5bc6kujbWWmDcsqgqoqmfaqwniU65PDUg0ZeD0tQTNDN1HXlI1dZ102YVUCX5jQIvhJ2C65JyenagGbos49KO1SYKnD5ZcAXn1WxxaQbiHsELISeL8jdIZMc9PZoe2TohXdkuASwNbjIDukjqvfvbjTYK6pNmT3RnGKPDSkp2Aqu39ETz0QlbC8S9ZfUUboxe7jiXu7u8cUNGGBrnKLgAIxc9XAxNVLX7D4NIS4o9h59LNYoU3vulmT4wHeBXu4Wk0BdngAOKiclo0aDSwafuI1DHH0RTngmySR2o4HkTbK5Uycc25Fs1gexen0fe1DreWpvmfqwVKvSHI1bJyB2PayC9KHKpmzfiqp8fJqLrY6Z6QtLr0yZWwjlwXpUHz9fEO0cgc47aw15P7DKjCLifjspBJKwdan3uDVtousLplrBTVtHYTxhRaHanx0F80dHZl4veBThpX1AFGwdE9Zy72K4K28sG6NHgC9En7qBc2rtjTLE1irPoAg9JzZfRh2uWCjEEV2KWplZ5Cy5FSaHvGM8qtq2i6gzFkFQYh3SI9OkdHND1Yl0Kh3a7CoHVWmxPdMPdeF3P4HrY7w3IkLTW6KV6STC0qTTAnRrHWmzUGqsnipqKXBgmrkCxTJj2odfOmlr9EPPr0hNxk6P5GJgdjTdr9sDElo8PrUTBI9PX9hBm1w9yLfnExMsSV8lv9sseyWWRigNkZdI3lGL8CkYIYuhJVCSBaH5O8lrff7P0mqh71nHjL2AXbp5NQqLkrLO5dgiiks5rMSfub25WJgIFfZYLuxwuwRAxJj3Iwa5q1oH5vqFv9rFaWyBGPyMINNUv4l3xmASc6u2hinAorNK4udIXgSrZH1Qs6CSqUTfhyVIlTrCm5lrAeMHunjUMoeMHo9nmqNZpeAYmeuNpJaMILaCVoRcnWdoQ6fp2g1yWRoOfmKiXd1Oucy0utTfxMKvuKyxFAVvl0uxAB2AjkfEhrD5Ywi1p5irSamOK3YxpchsfKI4dcDtnrguKjEqEraytM0Jt9EoKwwsa9dThWUBs5NuS1AJa9tj50R3HG03N7vYHztdRQ92UMrfVqxr7LiGQlneshyZyndqDndCWU50pEBpA5iZf0R8Mrbep31V9TiJRac4peRr4Ia3uud8HdiQI7KDVN5z9ZC6WLlVnV0zCk2ffAfK2rbKSI9vyMSclaQ2npEE7kvlendrnKofQX9koODJLR46pjqvkDztngMDnkxWhXw5pZMaxAwrE41KsBWM1GyelbeHzY9QuRkL5irTXzPTjYctiJqsLxcsMJ1wYUKlAXLP7LZ5wVPjNO6bFkEFNWPF0Rv5IecsGAbry2gj2ANML28sZJfPrT5i0Em0oiTt2xmjYOJKPJMmno4xfk93P587aEp5nvSe7V7A5FEGN2S8fRlHYkSyVkJPFYvVds9T5WTamFOMPOOyU0uxfXxR9qVPHwe9LtGINJNV9LVGPOSLhO1CrdBNRByZwt1rQdy1WdCiQ2918MzyyrcfGQa3o0Vny7ItAVJuNRRfjScujeQJzkH7UxOGJzON2KgLUjxCY8dfiL0fgjru286ZovwvB6nV0no7BrI3HABD1UdFcxGvVhSJYGJenLbrmS8aSVQcR07S4O5jTxRbaOrKir3SI1a8yEdrfDRK1vyIO6yf1IltgXS6bXNDHdILgGm8VHbLX1GPjvlvGh12CbSVcGHK1S59DWrNwZcUOanyppUYMGHfOC1WZ0Fnhsl7cYADFJvK5ia2Mu0D7QIQnYZyp057CaqewLYUNYZVHD3WjEvYEqonjq2bJpdRZcmK64uPmuvvLK6DmvA35mcDsv7TyPjjbcBkUaQQou43nYciZH4Ok2HQJx1LEWrUXSVRkEGxKZJxLx3ceUjYiwnH7zC6nA3edYpyBE1RyjEIDO7rlL7cK8VIADnAzdaGHniske6LuC8E146JENXwRxGXXeJkQMufby4KG89A8rMudRKGVp5gpFcEc8hFoDSQ4gbVSqKy37fN6CrCOyRehFz3fXh2AQYBsn79lr2hzmGKTZObfGUjZLZ6BdMMtvzaljFNsxqXZPTezgNXTOsajIi02NNGyzMQM8rrBSA5PD347YIHiXBvYSZW0tIUv5F5EueOuXeZmpErOqbtwFlqDUx844JbijK2uNCouKLBlBNMKEO90nswkgFnNUyfxUOsvYSuULQ1bI4aAMw17LLc8h794vlG1qAqLB3jvaQlDZfnPAJkCBkcBXpSgHaHPfcYaRBQ8ZgfIGWzKLtRBNJrX8MmzHeuTT6GmIvQpCYImCo1MHpvcFzJhK3QoL9s78HOmsgBJfBSIu5cgyKLaUndH7gxvCepz6RGJu6hRtjbUcJz5TkWn1K0bNwZRnLuzJjudYC19QLGsfpaf6a46TgCdQFhCFxA2vqVdygrpBj9ydKbSfw4ODEbdTSbkAOJBthxCb0MVbFNMq8NtMBMtWSSJg0OfiU2CeFY1glEiUqYazd8i8uQG1wFZvGQDO8KRGIEvMnzqFaVF9eIiWobpg9fAygS9AhM0epNnCd7KrrRQxGwOwqz8I5a3cUFLeyRmFJQB2CSEl8pavEm6GrwzIjn3UTfRGUk3eXtMpqHq9fEN88iXDN2zbMehYZ4jOHNQ6bnDBHtYbnqN5lkY1TjlifuDZZIwZIpXnnzVOQx9oizb8KsDDZz1jxIH33WhU7qssqJLlQ1QCMvzwm4Et1V8WyuG2fEba5hJOW8AsnCRF7CerLrLNYeZxOKn6y33RUxnIXeZMtPQXV8eOn00ymgsl3JzcqJtILDZTZjjcUyRuk5Fgn0l9Mo1ZPgfrB12Ezq8W3KDUyHoEo5oVfJrnoaTXK3qSyfGszmLGy38Vz0b15pbMxoMldr6m74SAArl5Nun9l60B4Lse3j3Mka5l9nc7aZ2vvqSBIQNps4EeD6eukvGC8M5IJL7xW4gVL7mukkiXts1K5S4UGFEslJcjTyg3HUsXgTPRaPQeFfXpGsU35XGfb8wNYSdzxClkeJNRUkdz72kGl0LOKsyGBXviPkIbj1gWx3F6t6B6egdEszzMhq8qGRTz3sINMsTMSZHM9qc338I56DiZBNhvDavg5luKbBcFccOTNMQ1F8uEIf1pHxJKkcQQLqweiB3FRYCAztkxtRQToSpPDr8ZcaNJ7bk2tdKeQFBKjKJh8HUfLwADHdSKoqvj0dOzAivnx1TlMX46e6LJb9sYoZ9t06bNVd32oSHIXfRdP0Uyrr2YQpnUMSYxMgSWlqshjwiFUy6K6IuOqHJTUQmOvPDDoViytaRKQoPFGj8awx1udVZunsP9zjW7KVzAerP77x32uWUCllCvnhJP1MbIlAX99FgzodRG3OkoxwaOzjeZTHlEcqDUGxLnWI93t0OrnDyeaxFN8JQTsJ0YevI2GDVDU0WLoQAkOiwlOV7sRJoRslgAkS7jCM45ueV5Cidr2I5Ag7cQgfHJq0Gu9kbOexoEh5mzil5Ufhpkh6TvHZ1xVNQyTfq5rLm0pCDHJGKYmw2vZqUcrEiSyXBzuJahVTd4ORXS1nUa7OuZjFmS1EwfyUyHbxAMNfHd0PD5e3lu4kw1Log5kR2AqDUBhk59nMWpx2nSCHfeilfLhJlyBmvJHHVBgw7ypZQqeocDzfCj3KB1uV6NooS8N94v7aAcsIBPHeiRzY546VpnJpUB9JlFLaxvZ9VtfRd6saxUbRo7UPwqrvPmnNG5l5LADBEmzPeGLN504HQWP6zY5UZJP20DsQhmv4oaMGtoKHpf9W4BJYKG7izgSxx4fPduEnhuDbh0qWbPpBSZ2FLMqXcz0Mb33kMTfFMQfcommP2LFjl1mNg1gj1zlMoW9ewX1Orh2iYnfKF5omaTtHhEgpTANtQSnk2gUwladOip11URh8tljDpByDGlMZabVwuGiSXNs95DR4mTcdbKPxe9saFc57OYh9F9Zgn9XytjR96ryFgluGebLvVCYwKt6zVR4y3dwboMy0NYl30uO4hN7SLELouzcthC34edRNqLAwfguiQ7DKVAwH4Y9U1Fws240b95SKIT5ofq65UBHCePEMLtunOZxMplO7WD4igx0binu0lNoX2jwAANh6AajPkrjJ9EvZv8aFdsJInBCoquOMuUKTvH7juN6t5kL79aBGSmcxAaAzbMoLOsd55V7ZBXD6NVrUHPjm8MKmSlPZXq9GBOfOHYVbeEpqVb6mCwK30LC12TB4XMtaqtQgK3OUAw3zNbAhqW98Ixp3SaWVcc767Q186AaXXaFCunAD19i8q7hT52OosgxKj1eQQxI3boIMFAV4pc1mO2btXNu7Gu4oIKDKLUe2ypYS91H7VOxtYaOGx9mBOg9lctyZrzgbUvls3UeACSP0U2wBvEaZQi70Xa9ujlwsCViR0X8I8JxrR8TgjWhHn2gjxIdqWmhbndyobvHmgfn7NjxvMKWaUy4LQo6dCXBzrQQBHRermorYyxUjpdKTGZq0e872wwIRgaBOENi6crIzsRjxgaeXGJLsOlMkqhaRSAN6pdWx7NQsxHf6bau4VdzWI5Y3nRIyKrvEcrySwUZyK0et2BaANEGqf0WBrhsPGE1kYUQhz4dNpS6MagXwC31QdShxyak5JaqPe2VYm4EJjag3DwFvDZXZxVPQIBukXLUgsfuEBpVXU0BAQyxX5SOw6bFR1Ug6zPRoM3nQJ8jFBlz5lFRLghEFri9Lmcz3qIXWMokMLCBHCBkuUZDJP7CP18HZcQ5keLuCbPcAVKJ4wQ6lXns05K8M2FtYrQSx3noFfFgVQpCH9TCdaALSN3wg1rMfS0aI57qZAazfyQcitZBH2TwXDOFWxRmmrAjZRDrAvTl6nBEV3kORQ02jUsvnSKhekPghslUWYv1WJBslvwJUmyBd8R6sKGtWvQWxANogNDzDMemfCJtdN5n1aJo2eOVKdPMZe7GNBJ4rQ2RJXKP0nZwhOvktcFtuifUT2H7fXDvJEsE7sRh2jtpzfuyiLzf4ayGTRLG68TsHcPqGItZtMmVfiDJOzg136oIRnsh3ol5uxM
=======================================
--- /dev/null
+++ /third_party/gsutil/gslib/tests/test_hash.py Mon Nov 3 12:38:28 2014
UTC
@@ -0,0 +1,83 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Unit tests for hash command."""
+
+import os
+
+from gslib.exception import CommandException
+import gslib.tests.testcase as testcase
+
+
+class TestHash(testcase.GsUtilUnitTestCase):
+ """Unit tests for hash command."""
+
+ _TEST_FILE_CONTENTS = '123456\n'
+ _TEST_FILE_B64_CRC = 'nYmSiA=='
+ _TEST_FILE_B64_MD5 = '9EeyCn/L9TpdW+AT6gsVrw=='
+ _TEST_FILE_HEX_CRC = '9D899288'
+ _TEST_FILE_HEX_MD5 = 'f447b20a7fcbf53a5d5be013ea0b15af'
+
+ def testHashContents(self):
+ tmp_file = self.CreateTempFile(contents=self._TEST_FILE_CONTENTS)
+ stdout = self.RunCommand('hash', args=[tmp_file], return_stdout=True)
+ self.assertIn('Hashes [base64]', stdout)
+ self.assertIn('\tHash (crc32c):\t\t%s' % self._TEST_FILE_B64_CRC,
stdout)
+ self.assertIn('\tHash (md5):\t\t%s' % self._TEST_FILE_B64_MD5, stdout)
+
+ def testHashNoMatch(self):
+ try:
+ self.RunCommand('hash', args=['non-existent-file'])
+ self.fail('Did not get expected CommandException')
+ except CommandException, e:
+ self.assertRaisesRegexp(e, r'No files matched')
+
+ def testHashCloudObject(self):
+ try:
+ self.RunCommand('hash', args=['gs://bucket/object'])
+ self.fail('Did not get expected CommandException')
+ except CommandException, e:
+ self.assertEquals('"hash" command requires a file URL', e.reason)
+
+ def testHashHexFormat(self):
+ tmp_file = self.CreateTempFile(contents=self._TEST_FILE_CONTENTS)
+ stdout = self.RunCommand('hash', args=['-h', tmp_file],
return_stdout=True)
+ self.assertIn('Hashes [hex]', stdout)
+ self.assertIn('\tHash (crc32c):\t\t%s' % self._TEST_FILE_HEX_CRC,
stdout)
+ self.assertIn('\tHash (md5):\t\t%s' % self._TEST_FILE_HEX_MD5, stdout)
+
+ def testHashWildcard(self):
+ num_test_files = 2
+ tmp_dir = self.CreateTempDir(test_files=num_test_files)
+ stdout = self.RunCommand('hash', args=[os.path.join(tmp_dir, '*')],
+ return_stdout=True)
+ # One summary line and two hash lines per file.
+ num_expected_lines = num_test_files * (1 + 2)
+ self.assertEquals(len(stdout.splitlines()), num_expected_lines)
+
+ def testHashSelectAlg(self):
+ tmp_file = self.CreateTempFile(contents=self._TEST_FILE_CONTENTS)
+ stdout_crc = self.RunCommand('hash', args=['-c', tmp_file],
+ return_stdout=True)
+ stdout_md5 = self.RunCommand('hash', args=['-m', tmp_file],
+ return_stdout=True)
+ stdout_both = self.RunCommand('hash', args=['-c', '-m', tmp_file],
+ return_stdout=True)
+ for stdout in (stdout_crc, stdout_both):
+ self.assertIn('\tHash (crc32c):\t\t%s' % self._TEST_FILE_B64_CRC,
stdout)
+ for stdout in (stdout_md5, stdout_both):
+ self.assertIn('\tHash (md5):\t\t%s' % self._TEST_FILE_B64_MD5,
stdout)
+ self.assertNotIn('md5', stdout_crc)
+ self.assertNotIn('crc32c', stdout_md5)
+
=======================================
--- /dev/null
+++ /third_party/gsutil/gslib/tests/test_hashing_helper.py Mon Nov 3
12:38:28 2014 UTC
@@ -0,0 +1,246 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Unit tests for hashing helper functions and classes."""
+
+from __future__ import absolute_import
+
+from hashlib import md5
+import os
+import pkgutil
+
+from gslib.exception import CommandException
+from gslib.hashing_helper import CalculateMd5FromContents
+from gslib.hashing_helper import HashingFileUploadWrapper
+import gslib.tests.testcase as testcase
+from gslib.util import StorageUrlFromString
+from gslib.util import TRANSFER_BUFFER_SIZE
+
+
+_TEST_FILE = 'test.txt'
+
+
+class TestHashingFileUploadWrapper(testcase.GsUtilUnitTestCase):
+ """Unit tests for the HashingFileUploadWrapper class."""
+
+ _temp_test_file = None
+ _dummy_url = StorageUrlFromString('gs://bucket/object')
+
+ def _GetTestFile(self):
+ contents = pkgutil.get_data('gslib', 'tests/test_data/%s' % _TEST_FILE)
+ if not self._temp_test_file:
+ self._temp_test_file = self.CreateTempFile(
+ file_name=_TEST_FILE, contents=contents)
+ return self._temp_test_file
+
+ def testReadToEOF(self):
+ digesters = {'md5': md5()}
+ tmp_file = self.CreateTempFile(contents='a' * TRANSFER_BUFFER_SIZE * 4)
+ with open(tmp_file, 'rb') as stream:
+ wrapper = HashingFileUploadWrapper(stream, digesters, {'md5': md5},
+ self._dummy_url, self.logger)
+ wrapper.read()
+ with open(tmp_file, 'rb') as stream:
+ actual = CalculateMd5FromContents(stream)
+ self.assertEqual(actual, digesters['md5'].hexdigest())
+
+ def _testSeekBack(self, initial_position, seek_back_amount):
+ """Tests reading then seeking backwards.
+
+ This function simulates an upload that is resumed after a connection
break.
+ It reads one transfer buffer at a time until it reaches
initial_position,
+ then seeks backwards (as if the server did not receive some of the
bytes)
+ and reads to the end of the file, ensuring the hash matches the
original
+ file upon completion.
+
+ Args:
+ initial_position: Initial number of bytes to read before seek.
+ seek_back_amount: Number of bytes to seek backward.
+
+ Raises:
+ AssertionError on wrong amount of data remaining or hash mismatch.
+ """
+ tmp_file = self._GetTestFile()
+ tmp_file_len = os.path.getsize(tmp_file)
+
+ self.assertGreaterEqual(
+ initial_position, seek_back_amount,
+ 'seek_back_amount must be less than initial position %s '
+ '(but was actually: %s)' % (initial_position, seek_back_amount))
+ self.assertLess(
+ initial_position, tmp_file_len,
+ 'initial_position must be less than test file size %s '
+ '(but was actually: %s)' % (tmp_file_len, initial_position))
+
+ digesters = {'md5': md5()}
+ with open(tmp_file, 'rb') as stream:
+ wrapper = HashingFileUploadWrapper(stream, digesters, {'md5': md5},
+ self._dummy_url, self.logger)
+ position = 0
+ while position < initial_position - TRANSFER_BUFFER_SIZE:
+ data = wrapper.read(TRANSFER_BUFFER_SIZE)
+ position += len(data)
+ wrapper.read(initial_position - position)
+ wrapper.seek(initial_position - seek_back_amount)
+ self.assertEqual(wrapper.tell(),
+ initial_position - seek_back_amount)
+ data = wrapper.read()
+ self.assertEqual(
+ len(data), tmp_file_len - (initial_position - seek_back_amount))
+ with open(tmp_file, 'rb') as stream:
+ actual = CalculateMd5FromContents(stream)
+ self.assertEqual(actual, digesters['md5'].hexdigest())
+
+ def testSeekToBeginning(self):
+ for num_bytes in (TRANSFER_BUFFER_SIZE - 1,
+ TRANSFER_BUFFER_SIZE,
+ TRANSFER_BUFFER_SIZE + 1,
+ TRANSFER_BUFFER_SIZE * 2 - 1,
+ TRANSFER_BUFFER_SIZE * 2,
+ TRANSFER_BUFFER_SIZE * 2 + 1,
+ TRANSFER_BUFFER_SIZE * 3 - 1,
+ TRANSFER_BUFFER_SIZE * 3,
+ TRANSFER_BUFFER_SIZE * 3 + 1):
+ self._testSeekBack(num_bytes, num_bytes)
+
+ def testSeekBackAroundOneBuffer(self):
+ for initial_position in (TRANSFER_BUFFER_SIZE + 1,
+ TRANSFER_BUFFER_SIZE * 2 - 1,
+ TRANSFER_BUFFER_SIZE * 2,
+ TRANSFER_BUFFER_SIZE * 2 + 1,
+ TRANSFER_BUFFER_SIZE * 3 - 1,
+ TRANSFER_BUFFER_SIZE * 3,
+ TRANSFER_BUFFER_SIZE * 3 + 1):
+ for seek_back_amount in (TRANSFER_BUFFER_SIZE - 1,
+ TRANSFER_BUFFER_SIZE,
+ TRANSFER_BUFFER_SIZE + 1):
+ self._testSeekBack(initial_position, seek_back_amount)
+
+ def testSeekBackMoreThanOneBuffer(self):
+ for initial_position in (TRANSFER_BUFFER_SIZE * 2 + 1,
+ TRANSFER_BUFFER_SIZE * 3 - 1,
+ TRANSFER_BUFFER_SIZE * 3,
+ TRANSFER_BUFFER_SIZE * 3 + 1):
+ for seek_back_amount in (TRANSFER_BUFFER_SIZE * 2 - 1,
+ TRANSFER_BUFFER_SIZE * 2,
+ TRANSFER_BUFFER_SIZE * 2 + 1):
+ self._testSeekBack(initial_position, seek_back_amount)
+
+ def _testSeekForward(self, initial_seek):
+ """Tests seeking to an initial position and then reading.
+
+ This function simulates an upload that is resumed after a process
break.
+ It seeks from zero to the initial position (as if the server already
had
+ those bytes). Then it reads to the end of the file, ensuring the hash
+ matches the original file upon completion.
+
+ Args:
+ initial_seek: Number of bytes to initially seek.
+
+ Raises:
+ AssertionError on wrong amount of data remaining or hash mismatch.
+ """
+ tmp_file = self._GetTestFile()
+ tmp_file_len = os.path.getsize(tmp_file)
+
+ self.assertLess(
+ initial_seek, tmp_file_len,
+ 'initial_seek must be less than test file size %s '
+ '(but was actually: %s)' % (tmp_file_len, initial_seek))
+
+ digesters = {'md5': md5()}
+ with open(tmp_file, 'rb') as stream:
+ wrapper = HashingFileUploadWrapper(stream, digesters, {'md5': md5},
+ self._dummy_url, self.logger)
+ wrapper.seek(initial_seek)
+ self.assertEqual(wrapper.tell(), initial_seek)
+ data = wrapper.read()
+ self.assertEqual(len(data), tmp_file_len - initial_seek)
+ with open(tmp_file, 'rb') as stream:
+ actual = CalculateMd5FromContents(stream)
+ self.assertEqual(actual, digesters['md5'].hexdigest())
+
+ def testSeekForward(self):
+ for initial_seek in (0,
+ TRANSFER_BUFFER_SIZE - 1,
+ TRANSFER_BUFFER_SIZE,
+ TRANSFER_BUFFER_SIZE + 1,
+ TRANSFER_BUFFER_SIZE * 2 - 1,
+ TRANSFER_BUFFER_SIZE * 2,
+ TRANSFER_BUFFER_SIZE * 2 + 1):
+ self._testSeekForward(initial_seek)
+
+ def _testSeekAway(self, initial_read):
+ """Tests reading to an initial position and then seeking to EOF and
back.
+
+ This function simulates an size check on the input file by seeking to
the
+ end of the file and then back to the current position. Then it reads to
+ the end of the file, ensuring the hash matches the original file upon
+ completion.
+
+ Args:
+ initial_read: Number of bytes to initially read.
+
+ Raises:
+ AssertionError on wrong amount of data remaining or hash mismatch.
+ """
+ tmp_file = self._GetTestFile()
+ tmp_file_len = os.path.getsize(tmp_file)
+
+ self.assertLess(
+ initial_read, tmp_file_len,
+ 'initial_read must be less than test file size %s '
+ '(but was actually: %s)' % (tmp_file_len, initial_read))
+
+ digesters = {'md5': md5()}
+ with open(tmp_file, 'rb') as stream:
+ wrapper = HashingFileUploadWrapper(stream, digesters, {'md5': md5},
+ self._dummy_url, self.logger)
+ wrapper.read(initial_read)
+ self.assertEqual(wrapper.tell(), initial_read)
+ wrapper.seek(0, os.SEEK_END)
+ self.assertEqual(wrapper.tell(), tmp_file_len)
+ wrapper.seek(initial_read, os.SEEK_SET)
+ data = wrapper.read()
+ self.assertEqual(len(data), tmp_file_len - initial_read)
+ with open(tmp_file, 'rb') as stream:
+ actual = CalculateMd5FromContents(stream)
+ self.assertEqual(actual, digesters['md5'].hexdigest())
+
+ def testValidSeekAway(self):
+ for initial_read in (0,
+ TRANSFER_BUFFER_SIZE - 1,
+ TRANSFER_BUFFER_SIZE,
+ TRANSFER_BUFFER_SIZE + 1,
+ TRANSFER_BUFFER_SIZE * 2 - 1,
+ TRANSFER_BUFFER_SIZE * 2,
+ TRANSFER_BUFFER_SIZE * 2 + 1):
+ self._testSeekAway(initial_read)
+
+ def testInvalidSeekAway(self):
+ """Tests seeking to EOF and then reading without first doing a
SEEK_SET."""
+ tmp_file = self._GetTestFile()
+ digesters = {'md5': md5()}
+ with open(tmp_file, 'rb') as stream:
+ wrapper = HashingFileUploadWrapper(stream, digesters, {'md5': md5},
+ self._dummy_url, self.logger)
+ wrapper.read(TRANSFER_BUFFER_SIZE)
+ wrapper.seek(0, os.SEEK_END)
+ try:
+ wrapper.read()
+ self.fail('Expected CommandException for invalid seek.')
+ except CommandException, e:
+ self.assertIn(
+ 'Read called on hashing file pointer in an unknown position',
+ str(e))
=======================================
--- /dev/null
+++ /third_party/gsutil/gslib/tests/test_mb.py Mon Nov 3 12:38:28 2014 UTC
@@ -0,0 +1,32 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Integration tests for mb command."""
+
+from __future__ import absolute_import
+
+import gslib.tests.testcase as testcase
+from gslib.tests.testcase.integration_testcase import SkipForS3
+from gslib.tests.util import ObjectToURI as suri
+
+
+class TestMb(testcase.GsUtilIntegrationTestCase):
+ """Integration tests for mb command."""
+
+ @SkipForS3('S3 returns success when bucket already exists.')
+ def test_mb_bucket_exists(self):
+ bucket_uri = self.CreateBucket()
+ stderr = self.RunGsUtil(['mb', suri(bucket_uri)], expected_status=1,
+ return_stderr=True)
+ self.assertIn('already exists', stderr)
=======================================
--- /dev/null
+++ /third_party/gsutil/gslib/tests/test_parallel_cp.py Mon Nov 3 12:38:28
2014 UTC
@@ -0,0 +1,220 @@
+# -*- coding: utf-8 -*-
+# Copyright 2010 Google Inc. All Rights Reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish, dis-
+# tribute, sublicense, and/or sell copies of the Software, and to permit
+# persons to whom the Software is furnished to do so, subject to the fol-
+# lowing conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-
+# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
+# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+"""Tests for parallel uploads ported from gsutil naming tests.
+
+Currently, the mock storage service is not thread-safe and therefore not
+suitable for multiprocess/multithreaded testing. Since parallel composite
+uploads necessarily create at least one worker thread outside of main,
+these tests are present in this file as temporary (slower) integration
tests
+to provide validation for parallel composite uploads until a thread-safe
+mock storage service rewrite.
+
+Tests for relative paths are not included as integration_testcase does not
+support modifying the current working directory.
+"""
+
+import os
+
+import gslib.tests.testcase as testcase
+from gslib.tests.util import ObjectToURI as suri
+from gslib.tests.util import PerformsFileToObjectUpload
+from gslib.util import Retry
+
+
+class TestParallelCp(testcase.GsUtilIntegrationTestCase):
+ """Unit tests for gsutil naming logic."""
+
+ @PerformsFileToObjectUpload
+ def testCopyingTopLevelFileToBucket(self):
+ """Tests copying one top-level file to a bucket."""
+ src_file = self.CreateTempFile(file_name='f0')
+ dst_bucket_uri = self.CreateBucket()
+ self.RunGsUtil(['cp', src_file, suri(dst_bucket_uri)])
+
+ lines = self.AssertNObjectsInBucket(dst_bucket_uri, 1)
+ self.assertEqual(suri(dst_bucket_uri, 'f0'), lines[0])
+
+ @PerformsFileToObjectUpload
+ def testCopyingMultipleFilesToBucket(self):
+ """Tests copying multiple files to a bucket."""
+ src_file0 = self.CreateTempFile(file_name='f0')
+ src_file1 = self.CreateTempFile(file_name='f1')
+ dst_bucket_uri = self.CreateBucket()
+ self.RunGsUtil(['cp', src_file0, src_file1, suri(dst_bucket_uri)])
+
+ lines = self.AssertNObjectsInBucket(dst_bucket_uri, 2)
+ self.assertEqual(suri(dst_bucket_uri, 'f0'), lines[0])
+ self.assertEqual(suri(dst_bucket_uri, 'f1'), lines[1])
+
+ @PerformsFileToObjectUpload
+ def testCopyingNestedFileToBucketSubdir(self):
+ """Tests copying a nested file to a bucket subdir.
+
+ Tests that we correctly translate local FS-specific delimiters ('\' on
+ Windows) to bucket delimiter (/).
+ """
+ tmpdir = self.CreateTempDir()
+ subdir = os.path.join(tmpdir, 'subdir')
+ os.mkdir(subdir)
+ src_file = self.CreateTempFile(tmpdir=tmpdir, file_name='obj',
contents='')
+ dst_bucket_uri = self.CreateBucket()
+ # Make an object under subdir so next copy will treat subdir as a
subdir.
+ self.RunGsUtil(['cp', src_file, suri(dst_bucket_uri, 'subdir/a')])
+ self.RunGsUtil(['cp', src_file, suri(dst_bucket_uri, 'subdir')])
+
+ lines = self.AssertNObjectsInBucket(dst_bucket_uri, 2)
+ self.assertEqual(suri(dst_bucket_uri, 'subdir/a'), lines[0])
+ self.assertEqual(suri(dst_bucket_uri, 'subdir/obj'), lines[1])
+
+ @PerformsFileToObjectUpload
+ def testCopyingAbsolutePathDirToBucket(self):
+ """Tests recursively copying absolute path directory to a bucket."""
+ dst_bucket_uri = self.CreateBucket()
+ src_dir_root = self.CreateTempDir(test_files=[
+ 'f0', 'f1', 'f2.txt', ('dir0', 'dir1', 'nested')])
+ self.RunGsUtil(['cp', '-R', src_dir_root, suri(dst_bucket_uri)])
+ src_tmpdir = os.path.split(src_dir_root)[1]
+
+ lines = self.AssertNObjectsInBucket(dst_bucket_uri, 4)
+ self.assertEqual(suri(dst_bucket_uri, src_tmpdir,
+ 'dir0', 'dir1', 'nested'), lines[0])
+ self.assertEqual(suri(dst_bucket_uri, src_tmpdir, 'f0'), lines[1])
+ self.assertEqual(suri(dst_bucket_uri, src_tmpdir, 'f1'), lines[2])
+ self.assertEqual(suri(dst_bucket_uri, src_tmpdir, 'f2.txt'), lines[3])
+
+ @PerformsFileToObjectUpload
+ def testCopyingDirContainingOneFileToBucket(self):
+ """Tests copying a directory containing 1 file to a bucket.
+
+ We test this case to ensure that correct bucket handling isn't
dependent
+ on the copy being treated as a multi-source copy.
+ """
+ dst_bucket_uri = self.CreateBucket()
+ src_dir = self.CreateTempDir(test_files=[('dir0', 'dir1', 'foo')])
+ self.RunGsUtil(['cp', '-R', os.path.join(src_dir, 'dir0', 'dir1'),
+ suri(dst_bucket_uri)])
+
+ lines = self.AssertNObjectsInBucket(dst_bucket_uri, 1)
+ self.assertEqual(suri(dst_bucket_uri, 'dir1', 'foo'), lines[0])
+
+ @PerformsFileToObjectUpload
+ def testCopyingFileToObjectWithConsecutiveSlashes(self):
+ """Tests copying a file to an object containing consecutive slashes."""
+ src_file = self.CreateTempFile(file_name='f0')
+ dst_bucket_uri = self.CreateBucket()
+ self.RunGsUtil(['cp', src_file, suri(dst_bucket_uri) + '//obj'])
+
+ lines = self.AssertNObjectsInBucket(dst_bucket_uri, 1)
+ self.assertEqual(suri(dst_bucket_uri) + '//obj', lines[0])
+
+ @PerformsFileToObjectUpload
+ def testCopyingObjsAndFilesToBucket(self):
+ """Tests copying objects and files to a bucket."""
+ src_bucket_uri = self.CreateBucket()
+ self.CreateObject(src_bucket_uri, object_name='f1', contents='foo')
+ src_dir = self.CreateTempDir(test_files=['f2'])
+ dst_bucket_uri = self.CreateBucket()
+ self.RunGsUtil(['cp', '-R', suri(src_bucket_uri, '**'),
+ '%s%s**' % (src_dir, os.sep), suri(dst_bucket_uri)])
+
+ lines = self.AssertNObjectsInBucket(dst_bucket_uri, 2)
+ self.assertEqual(suri(dst_bucket_uri, 'f1'), lines[0])
+ self.assertEqual(suri(dst_bucket_uri, 'f2'), lines[1])
+
+ @PerformsFileToObjectUpload
+ def testCopyingSubdirRecursiveToNonexistentSubdir(self):
+ """Tests copying a directory with a single file recursively to a
bucket.
+
+ The file should end up in a new bucket subdirectory with the file's
+ directory structure starting below the recursive copy point, as in
Unix cp.
+
+ Example:
+ filepath: dir1/dir2/foo
+ cp -r dir1 dir3
+ Results in dir3/dir2/foo being created.
+ """
+ src_dir = self.CreateTempDir()
+ self.CreateTempFile(tmpdir=src_dir + '/dir1/dir2', file_name='foo')
+ dst_bucket_uri = self.CreateBucket()
+ self.RunGsUtil(['cp', '-R', src_dir + '/dir1',
+ suri(dst_bucket_uri, 'dir3')])
+
+ lines = self.AssertNObjectsInBucket(dst_bucket_uri, 1)
+ self.assertEqual(suri(dst_bucket_uri, 'dir3/dir2/foo'), lines[0])
+
+ @PerformsFileToObjectUpload
+ def testCopyingWildcardedFilesToBucketSubDir(self):
+ """Tests copying wildcarded files to a bucket subdir."""
+ # Test with and without final slash on dest subdir.
+ for final_dst_char in ('', '/'):
+ dst_bucket_uri = self.CreateBucket()
+ self.CreateObject(dst_bucket_uri, object_name='subdir0/existing',
+ contents='foo')
+ self.CreateObject(dst_bucket_uri, object_name='subdir1/existing',
+ contents='foo')
+ src_dir = self.CreateTempDir(test_files=['f0', 'f1', 'f2'])
+
+ for i in range(2):
+ self.RunGsUtil(
+ ['cp', os.path.join(src_dir, 'f?'),
+ suri(dst_bucket_uri, 'subdir%d' % i) + final_dst_char])
+
+ @Retry(AssertionError, tries=3, timeout_secs=1)
+ def _Check1():
+ """Validate files were copied to the correct destinations."""
+ stdout = self.RunGsUtil(['ls', suri(dst_bucket_uri, 'subdir%d' %
i,
+ '**')],
+ return_stdout=True)
+ lines = stdout.split('\n')
+ self.assertEqual(5, len(lines))
+ self.assertEqual(suri(dst_bucket_uri, 'subdir%d' %
i, 'existing'),
+ lines[0])
+ self.assertEqual(suri(dst_bucket_uri, 'subdir%d' % i, 'f0'),
lines[1])
+ self.assertEqual(suri(dst_bucket_uri, 'subdir%d' % i, 'f1'),
lines[2])
+ self.assertEqual(suri(dst_bucket_uri, 'subdir%d' % i, 'f2'),
lines[3])
+ _Check1()
+
+ @PerformsFileToObjectUpload
+ def testCopyingOneNestedFileToBucketSubDir(self):
+ """Tests copying one nested file to a bucket subdir."""
+ # Test with and without final slash on dest subdir.
+ for final_dst_char in ('', '/'):
+
+ dst_bucket_uri = self.CreateBucket()
+ self.CreateObject(dst_bucket_uri, object_name='d0/placeholder',
+ contents='foo')
+ self.CreateObject(dst_bucket_uri, object_name='d1/placeholder',
+ contents='foo')
+
+ for i in range(2):
+ src_dir =
self.CreateTempDir(test_files=[('d3', 'd4', 'nested', 'f1')])
+ self.RunGsUtil(['cp', '-r', suri(src_dir, 'd3'),
+ suri(dst_bucket_uri, 'd%d' % i) + final_dst_char])
+
+ lines = self.AssertNObjectsInBucket(dst_bucket_uri, 4)
+
self.assertEqual(suri(dst_bucket_uri, 'd0', 'd3', 'd4', 'nested', 'f1'),
+ lines[0])
+ self.assertEqual(suri(dst_bucket_uri, 'd0', 'placeholder'), lines[1])
+
self.assertEqual(suri(dst_bucket_uri, 'd1', 'd3', 'd4', 'nested', 'f1'),
+ lines[2])
+ self.assertEqual(suri(dst_bucket_uri, 'd1', 'placeholder'), lines[3])
+
=======================================
--- /dev/null
+++ /third_party/gsutil/gslib/tests/test_rb.py Mon Nov 3 12:38:28 2014 UTC
@@ -0,0 +1,57 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Integration tests for rb command."""
+
+from __future__ import absolute_import
+
+import gslib.tests.testcase as testcase
+from gslib.tests.util import ObjectToURI as suri
+
+
+class TestRb(testcase.GsUtilIntegrationTestCase):
+ """Integration tests for rb command."""
+
+ def test_rb_bucket_works(self):
+ bucket_uri = self.CreateBucket()
+ self.RunGsUtil(['rb', suri(bucket_uri)])
+ stderr = self.RunGsUtil(
+ ['ls', '-Lb', 'gs://%s' % self.nonexistent_bucket_name],
+ return_stderr=True, expected_status=1)
+ self.assertIn('404', stderr)
+
+ def test_rb_bucket_not_empty(self):
+ bucket_uri = self.CreateBucket(test_objects=1)
+ stderr = self.RunGsUtil(['rb', suri(bucket_uri)], expected_status=1,
+ return_stderr=True)
+ self.assertIn('BucketNotEmpty', stderr)
+
+ def test_rb_versioned_bucket_not_empty(self):
+ bucket_uri = self.CreateVersionedBucket(test_objects=1)
+ stderr = self.RunGsUtil(['rb', suri(bucket_uri)], expected_status=1,
+ return_stderr=True)
+ self.assertIn('Bucket is not empty. Note: this is a versioned bucket',
+ stderr)
+
+ def test_rb_minus_f(self):
+ bucket_uri = self.CreateBucket()
+ stderr = self.RunGsUtil([
+ 'rb', '-f', 'gs://%s' % self.nonexistent_bucket_name,
+ suri(bucket_uri)], return_stderr=True, expected_status=1)
+ # There should be no error output, and existing bucket named after
+ # non-existent bucket should be gone.
+ self.assertNotIn('bucket does not exist.', stderr)
+ stderr = self.RunGsUtil(
+ ['ls', '-Lb', suri(bucket_uri)], return_stderr=True,
expected_status=1)
+ self.assertIn('404', stderr)
=======================================
--- /dev/null
+++ /third_party/gsutil/gslib/tests/test_rsync.py Mon Nov 3 12:38:28 2014
UTC
@@ -0,0 +1,807 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Integration tests for rsync command."""
+
+import os
+
+import crcmod
+
+import gslib.tests.testcase as testcase
+from gslib.tests.testcase.integration_testcase import SkipForS3
+from gslib.tests.util import ObjectToURI as suri
+from gslib.tests.util import PerformsFileToObjectUpload
+from gslib.tests.util import SetBotoConfigForTest
+from gslib.tests.util import unittest
+from gslib.util import IS_WINDOWS
+from gslib.util import Retry
+from gslib.util import UsingCrcmodExtension
+
+NO_CHANGES = 'Building synchronization state...\nStarting
synchronization\n'
+
+
+def _TailSet(start_point, listing):
+ """Returns set of object name tails.
+
+ Tails can be compared between source and dest, past the point at which
rsync
+ was done. For example if test ran rsync gs://bucket1/dir
gs://bucket2/dir2,
+ the tails for listings from bucket1 would start after "dir", while the
tails
+ for listings from bucket2 would start after "dir2".
+
+ Args:
+ start_point: The target of the rsync command, e.g., for the above
command it
+ would be gs://bucket1/dir for the bucket1 listing results
and
+ gs://bucket2/dir2 for the bucket2 listing results.
+ listing: The listing over which to compute tail.
+
+ Returns:
+ Object name tails.
+ """
+ return set(l[len(start_point):] for l in listing.strip().split('\n'))
+
+
+class TestRsync(testcase.GsUtilIntegrationTestCase):
+ """Integration tests for rsync command."""
+
+ @staticmethod
+ def _FlatListDir(directory):
+ """Perform a flat listing over directory.
+
+ Args:
+ directory: The directory to list
+
+ Returns:
+ Listings with path separators canonicalized to '/', to make
assertions
+ easier for Linux vs Windows.
+ """
+ result = []
+ for dirpath, _, filenames in os.walk(directory):
+ for f in filenames:
+ result.append(os.path.join(dirpath, f))
+ return '\n'.join(result).replace('\\', '/')
+
+ def _FlatListBucket(self, bucket_uri):
+ """Perform a flat listing over bucket_uri."""
+ return self.RunGsUtil(['ls', suri(bucket_uri, '**')],
return_stdout=True)
+
+ def test_invalid_args(self):
+ """Tests various invalid argument cases."""
+ bucket_uri = self.CreateBucket()
+ obj1 = self.CreateObject(bucket_uri=bucket_uri, object_name='obj1',
+ contents='obj1')
+ tmpdir = self.CreateTempDir()
+ # rsync object to bucket.
+ self.RunGsUtil(['rsync', suri(obj1), suri(bucket_uri)],
expected_status=1)
+ # rsync bucket to object.
+ self.RunGsUtil(['rsync', suri(bucket_uri), suri(obj1)],
expected_status=1)
+ # rsync bucket to non-existent bucket.
+ self.RunGsUtil(['rsync', suri(bucket_uri),
self.nonexistent_bucket_name],
+ expected_status=1)
+ # rsync object to dir.
+ self.RunGsUtil(['rsync', suri(obj1), tmpdir], expected_status=1)
+ # rsync dir to object.
+ self.RunGsUtil(['rsync', tmpdir, suri(obj1)], expected_status=1)
+ # rsync dir to non-existent bucket.
+ self.RunGsUtil(['rsync', tmpdir, suri(obj1),
self.nonexistent_bucket_name],
+ expected_status=1)
+
+ # Note: The tests below exercise the cases
+ # {src_dir, src_bucket} X {dst_dir, dst_bucket}. We use gsutil rsync -d
for
+ # all the cases but then have just one test without -d
(test_bucket_to_bucket)
+ # as representative of handling without the -d option. This provides
+ # reasonable test coverage because the -d handling it src/dest URI-type
+ # independent, and keeps the test case combinations more manageable.
+
+ def test_bucket_to_bucket(self):
+ """Tests that flat and recursive rsync between 2 buckets works
correctly."""
+ # Create 2 buckets with 1 overlapping object, 1 extra object at root
level
+ # in each, and 1 extra object 1 level down in each. Make the
overlapping
+ # objects named the same but with different content, to test that we
detect
+ # and properly copy in that case.
+ bucket1_uri = self.CreateBucket()
+ bucket2_uri = self.CreateBucket()
+ self.CreateObject(bucket_uri=bucket1_uri, object_name='obj1',
+ contents='obj1')
+ self.CreateObject(bucket_uri=bucket1_uri, object_name='obj2',
+ contents='obj2')
+ self.CreateObject(bucket_uri=bucket1_uri, object_name='subdir/obj3',
+ contents='subdir/obj3')
+ self.CreateObject(bucket_uri=bucket2_uri, object_name='obj2',
+ contents='OBJ2')
+ self.CreateObject(bucket_uri=bucket2_uri, object_name='obj4',
+ contents='obj4')
+ self.CreateObject(bucket_uri=bucket2_uri, object_name='subdir/obj5',
+ contents='subdir/obj5')
+
+ # Use @Retry as hedge against bucket listing eventual consistency.
+ @Retry(AssertionError, tries=3, timeout_secs=1)
+ def _Check1():
+ """Tests rsync works as expected."""
+ self.RunGsUtil(['rsync', suri(bucket1_uri), suri(bucket2_uri)])
+ listing1 = _TailSet(suri(bucket1_uri),
self._FlatListBucket(bucket1_uri))
+ listing2 = _TailSet(suri(bucket2_uri),
self._FlatListBucket(bucket2_uri))
+ # First bucket should have un-altered content.
+ self.assertEquals(listing1, set(['/obj1', '/obj2', '/subdir/obj3']))
+ # Second bucket should have new objects added from source bucket
(without
+ # removing extraneeous object found in dest bucket), and without the
+ # subdir objects synchronized.
+ self.assertEquals(listing2,
+ set(['/obj1', '/obj2', '/obj4', '/subdir/obj5']))
+ # Assert that the src/dest objects that had same length but different
+ # content were correctly synchronized (bucket to bucket sync uses
+ # checksums).
+ self.assertEquals('obj2', self.RunGsUtil(
+ ['cat', suri(bucket1_uri, 'obj2')], return_stdout=True))
+ self.assertEquals('obj2', self.RunGsUtil(
+ ['cat', suri(bucket2_uri, 'obj2')], return_stdout=True))
+ _Check1()
+
+ # Check that re-running the same rsync command causes no more changes.
+ self.assertEquals(NO_CHANGES, self.RunGsUtil(
+ ['rsync', suri(bucket1_uri), suri(bucket2_uri)],
return_stderr=True))
+
+ # Now add and remove some objects in each bucket and test rsync -r.
+ self.CreateObject(bucket_uri=bucket1_uri, object_name='obj6',
+ contents='obj6')
+ self.CreateObject(bucket_uri=bucket2_uri, object_name='obj7',
+ contents='obj7')
+ self.RunGsUtil(['rm', suri(bucket1_uri, 'obj1')])
+ self.RunGsUtil(['rm', suri(bucket2_uri, 'obj2')])
+
+ # Use @Retry as hedge against bucket listing eventual consistency.
+ @Retry(AssertionError, tries=3, timeout_secs=1)
+ def _Check2():
+ self.RunGsUtil(['rsync', '-r', suri(bucket1_uri), suri(bucket2_uri)])
+ listing1 = _TailSet(suri(bucket1_uri),
self._FlatListBucket(bucket1_uri))
+ listing2 = _TailSet(suri(bucket2_uri),
self._FlatListBucket(bucket2_uri))
+ # First bucket should have un-altered content.
+ self.assertEquals(listing1, set(['/obj2', '/obj6', '/subdir/obj3']))
+ # Second bucket should have objects tha were newly added to first
bucket
+ # (wihout removing extraneous dest bucket objects), and without the
+ # subdir objects synchronized.
+ self.assertEquals(listing2, set(['/obj1', '/obj2', '/obj4', '/obj6',
+ '/obj7', '/subdir/obj3',
+ '/subdir/obj5']))
+ _Check2()
+
+ # Check that re-running the same rsync command causes no more changes.
+ self.assertEquals(NO_CHANGES, self.RunGsUtil(
+ ['rsync', '-r', suri(bucket1_uri), suri(bucket2_uri)],
+ return_stderr=True))
+
+ def test_bucket_to_bucket_minus_d(self):
+ """Tests that flat and recursive rsync between 2 buckets works
correctly."""
+ # Create 2 buckets with 1 overlapping object, 1 extra object at root
level
+ # in each, and 1 extra object 1 level down in each. Make the
overlapping
+ # objects named the same but with different content, to test that we
detect
+ # and properly copy in that case.
+ bucket1_uri = self.CreateBucket()
+ bucket2_uri = self.CreateBucket()
+ self.CreateObject(bucket_uri=bucket1_uri, object_name='obj1',
+ contents='obj1')
+ self.CreateObject(bucket_uri=bucket1_uri, object_name='obj2',
+ contents='obj2')
+ self.CreateObject(bucket_uri=bucket1_uri, object_name='subdir/obj3',
+ contents='subdir/obj3')
+ self.CreateObject(bucket_uri=bucket2_uri, object_name='obj2',
+ contents='OBJ2')
+ self.CreateObject(bucket_uri=bucket2_uri, object_name='obj4',
+ contents='obj4')
+ self.CreateObject(bucket_uri=bucket2_uri, object_name='subdir/obj5',
+ contents='subdir/obj5')
+
+ # Use @Retry as hedge against bucket listing eventual consistency.
+ @Retry(AssertionError, tries=3, timeout_secs=1)
+ def _Check1():
+ """Tests rsync works as expected."""
+ self.RunGsUtil(['rsync', '-d', suri(bucket1_uri), suri(bucket2_uri)])
+ listing1 = _TailSet(suri(bucket1_uri),
self._FlatListBucket(bucket1_uri))
+ listing2 = _TailSet(suri(bucket2_uri),
self._FlatListBucket(bucket2_uri))
+ # First bucket should have un-altered content.
+ self.assertEquals(listing1, set(['/obj1', '/obj2', '/subdir/obj3']))
+ # Second bucket should have content like first bucket but without the
+ # subdir objects synchronized.
+ self.assertEquals(listing2, set(['/obj1', '/obj2', '/subdir/obj5']))
+ # Assert that the src/dest objects that had same length but different
+ # content were correctly synchronized (bucket to bucket sync uses
+ # checksums).
+ self.assertEquals('obj2', self.RunGsUtil(
+ ['cat', suri(bucket1_uri, 'obj2')], return_stdout=True))
+ self.assertEquals('obj2', self.RunGsUtil(
+ ['cat', suri(bucket2_uri, 'obj2')], return_stdout=True))
+ _Check1()
+
+ # Check that re-running the same rsync command causes no more changes.
+ self.assertEquals(NO_CHANGES, self.RunGsUtil(
+ ['rsync', '-d', suri(bucket1_uri), suri(bucket2_uri)],
+ return_stderr=True))
+
+ # Now add and remove some objects in each bucket and test rsync -r.
+ self.CreateObject(bucket_uri=bucket1_uri, object_name='obj6',
+ contents='obj6')
+ self.CreateObject(bucket_uri=bucket2_uri, object_name='obj7',
+ contents='obj7')
+ self.RunGsUtil(['rm', suri(bucket1_uri, 'obj1')])
+ self.RunGsUtil(['rm', suri(bucket2_uri, 'obj2')])
+
+ # Use @Retry as hedge against bucket listing eventual consistency.
+ @Retry(AssertionError, tries=3, timeout_secs=1)
+ def _Check2():
+ self.RunGsUtil(['rsync', '-d', '-r',
+ suri(bucket1_uri), suri(bucket2_uri)])
+ listing1 = _TailSet(suri(bucket1_uri),
self._FlatListBucket(bucket1_uri))
+ listing2 = _TailSet(suri(bucket2_uri),
self._FlatListBucket(bucket2_uri))
+ # First bucket should have un-altered content.
+ self.assertEquals(listing1, set(['/obj2', '/obj6', '/subdir/obj3']))
+ # Second bucket should have content like first bucket but without the
+ # subdir objects synchronized.
+ self.assertEquals(listing2, set(['/obj2', '/obj6', '/subdir/obj3']))
+ _Check2()
+
+ # Check that re-running the same rsync command causes no more changes.
+ self.assertEquals(NO_CHANGES, self.RunGsUtil(
+ ['rsync', '-d', '-r', suri(bucket1_uri), suri(bucket2_uri)],
+ return_stderr=True))
+
+ # Test sequential upload as well as parallel composite upload case.
+ @PerformsFileToObjectUpload
+ @unittest.skipUnless(UsingCrcmodExtension(crcmod),
+ 'Test requires fast crcmod.')
+ def test_dir_to_bucket_minus_d(self):
+ """Tests that flat and recursive rsync dir to bucket works
correctly."""
+ # Create dir and bucket with 1 overlapping object, 1 extra object at
root
+ # level in each, and 1 extra object 1 level down in each. Make the
+ # overlapping objects named the same but with different content, to
test
+ # that we detect and properly copy in that case.
+ tmpdir = self.CreateTempDir()
+ subdir = os.path.join(tmpdir, 'subdir')
+ os.mkdir(subdir)
+ bucket_uri = self.CreateBucket()
+ self.CreateTempFile(tmpdir=tmpdir, file_name='obj1', contents='obj1')
+ self.CreateTempFile(tmpdir=tmpdir, file_name='obj2', contents='obj2')
+ self.CreateTempFile(tmpdir=subdir, file_name='obj3',
contents='subdir/obj3')
+ self.CreateObject(bucket_uri=bucket_uri, object_name='obj2',
+ contents='OBJ2')
+ self.CreateObject(bucket_uri=bucket_uri, object_name='obj4',
+ contents='obj4')
+ self.CreateObject(bucket_uri=bucket_uri, object_name='subdir/obj5',
+ contents='subdir/obj5')
+
+ # Need to make sure the bucket listing is caught-up, otherwise the
+ # first rsync may not see obj2 and overwrite it.
+ self.AssertNObjectsInBucket(bucket_uri, 3)
+
+ # Use @Retry as hedge against bucket listing eventual consistency.
+ @Retry(AssertionError, tries=3, timeout_secs=1)
+ def _Check1():
+ """Tests rsync works as expected."""
+ self.RunGsUtil(['rsync', '-d', tmpdir, suri(bucket_uri)])
+ listing1 = _TailSet(tmpdir, self._FlatListDir(tmpdir))
+ listing2 = _TailSet(suri(bucket_uri),
self._FlatListBucket(bucket_uri))
+ # Dir should have un-altered content.
+ self.assertEquals(listing1, set(['/obj1', '/obj2', '/subdir/obj3']))
+ # Bucket should have content like dir but without the subdir objects
+ # synchronized.
+ self.assertEquals(listing2, set(['/obj1', '/obj2', '/subdir/obj5']))
+ # Assert that the src/dest objects that had same length but different
+ # content were not synchronized (dir to bucket sync doesn't use
checksums
+ # unless you specify -c).
+ with open(os.path.join(tmpdir, 'obj2')) as f:
+ self.assertEquals('obj2', '\n'.join(f.readlines()))
+ self.assertEquals('OBJ2', self.RunGsUtil(
+ ['cat', suri(bucket_uri, 'obj2')], return_stdout=True))
+ _Check1()
+
+ # Check that re-running the same rsync command causes no more changes.
+ self.assertEquals(NO_CHANGES, self.RunGsUtil(
+ ['rsync', '-d', tmpdir, suri(bucket_uri)], return_stderr=True))
+
+ # Now rerun the sync with the -c option.
+ # Use @Retry as hedge against bucket listing eventual consistency.
+ @Retry(AssertionError, tries=3, timeout_secs=1)
+ def _Check2():
+ """Tests rsync -c works as expected."""
+ self.RunGsUtil(['rsync', '-d', '-c', tmpdir, suri(bucket_uri)])
+ listing1 = _TailSet(tmpdir, self._FlatListDir(tmpdir))
+ listing2 = _TailSet(suri(bucket_uri),
self._FlatListBucket(bucket_uri))
+ # Dir should have un-altered content.
+ self.assertEquals(listing1, set(['/obj1', '/obj2', '/subdir/obj3']))
+ # Bucket should have content like dir but without the subdir objects
+ # synchronized.
+ self.assertEquals(listing2, set(['/obj1', '/obj2', '/subdir/obj5']))
+ # Assert that the src/dest objects that had same length but different
+ # content were synchronized (dir to bucket sync with -c uses
checksums).
+ with open(os.path.join(tmpdir, 'obj2')) as f:
+ self.assertEquals('obj2', '\n'.join(f.readlines()))
+ self.assertEquals('obj2', self.RunGsUtil(
+ ['cat', suri(bucket_uri, 'obj2')], return_stdout=True))
+ _Check2()
+
+ # Check that re-running the same rsync command causes no more changes.
+ self.assertEquals(NO_CHANGES, self.RunGsUtil(
+ ['rsync', '-d', '-c', tmpdir, suri(bucket_uri)],
return_stderr=True))
+
+ # Now add and remove some objects in dir and bucket and test rsync -r.
+ self.CreateTempFile(tmpdir=tmpdir, file_name='obj6', contents='obj6')
+ self.CreateObject(bucket_uri=bucket_uri, object_name='obj7',
+ contents='obj7')
+ os.unlink(os.path.join(tmpdir, 'obj1'))
+ self.RunGsUtil(['rm', suri(bucket_uri, 'obj2')])
+
+ # Use @Retry as hedge against bucket listing eventual consistency.
+ @Retry(AssertionError, tries=3, timeout_secs=1)
+ def _Check3():
+ self.RunGsUtil(['rsync', '-d', '-r', tmpdir, suri(bucket_uri)])
+ listing1 = _TailSet(tmpdir, self._FlatListDir(tmpdir))
+ listing2 = _TailSet(suri(bucket_uri),
self._FlatListBucket(bucket_uri))
+ # Dir should have un-altered content.
+ self.assertEquals(listing1, set(['/obj2', '/obj6', '/subdir/obj3']))
+ # Bucket should have content like dir but without the subdir objects
+ # synchronized.
+ self.assertEquals(listing2, set(['/obj2', '/obj6', '/subdir/obj3']))
+ _Check3()
+
+ # Check that re-running the same rsync command causes no more changes.
+ self.assertEquals(NO_CHANGES, self.RunGsUtil(
+ ['rsync', '-d', '-r', tmpdir, suri(bucket_uri)],
return_stderr=True))
+
+ @unittest.skipUnless(UsingCrcmodExtension(crcmod),
+ 'Test requires fast crcmod.')
+ def test_dir_to_dir_minus_d(self):
+ """Tests that flat and recursive rsync dir to dir works correctly."""
+ # Create 2 dirs with 1 overlapping file, 1 extra file at root
+ # level in each, and 1 extra file 1 level down in each. Make the
+ # overlapping files named the same but with different content, to test
+ # that we detect and properly copy in that case.
+ tmpdir1 = self.CreateTempDir()
+ tmpdir2 = self.CreateTempDir()
+ subdir1 = os.path.join(tmpdir1, 'subdir1')
+ subdir2 = os.path.join(tmpdir2, 'subdir2')
+ os.mkdir(subdir1)
+ os.mkdir(subdir2)
+ self.CreateTempFile(tmpdir=tmpdir1, file_name='obj1', contents='obj1')
+ self.CreateTempFile(tmpdir=tmpdir1, file_name='obj2', contents='obj2')
+ self.CreateTempFile(
+ tmpdir=subdir1, file_name='obj3', contents='subdir1/obj3')
+ self.CreateTempFile(tmpdir=tmpdir2, file_name='obj2', contents='OBJ2')
+ self.CreateTempFile(tmpdir=tmpdir2, file_name='obj4', contents='obj4')
+ self.CreateTempFile(
+ tmpdir=subdir2, file_name='obj5', contents='subdir2/obj5')
+
+ self.RunGsUtil(['rsync', '-d', tmpdir1, tmpdir2])
+ listing1 = _TailSet(tmpdir1, self._FlatListDir(tmpdir1))
+ listing2 = _TailSet(tmpdir2, self._FlatListDir(tmpdir2))
+ # dir1 should have un-altered content.
+ self.assertEquals(listing1, set(['/obj1', '/obj2', '/subdir1/obj3']))
+ # dir2 should have content like dir1 but without the subdir1 objects
+ # synchronized.
+ self.assertEquals(listing2, set(['/obj1', '/obj2', '/subdir2/obj5']))
+ # Assert that the src/dest objects that had same length but different
+ # checksums were not synchronized (dir to dir sync doesn't use
checksums
+ # unless you specify -c).
+ with open(os.path.join(tmpdir1, 'obj2')) as f:
+ self.assertEquals('obj2', '\n'.join(f.readlines()))
+ with open(os.path.join(tmpdir2, 'obj2')) as f:
+ self.assertEquals('OBJ2', '\n'.join(f.readlines()))
+
+ # Check that re-running the same rsync command causes no more changes.
+ self.assertEquals(NO_CHANGES, self.RunGsUtil(
+ ['rsync', '-d', tmpdir1, tmpdir2], return_stderr=True))
+
+ # Now rerun the sync with the -c option.
+ self.RunGsUtil(['rsync', '-d', '-c', tmpdir1, tmpdir2])
+ listing1 = _TailSet(tmpdir1, self._FlatListDir(tmpdir1))
+ listing2 = _TailSet(tmpdir2, self._FlatListDir(tmpdir2))
+ # dir1 should have un-altered content.
+ self.assertEquals(listing1, set(['/obj1', '/obj2', '/subdir1/obj3']))
+ # dir2 should have content like dir but without the subdir objects
+ # synchronized.
+ self.assertEquals(listing2, set(['/obj1', '/obj2', '/subdir2/obj5']))
+ # Assert that the src/dest objects that had same length but different
+ # content were synchronized (dir to dir sync with -c uses checksums).
+ with open(os.path.join(tmpdir1, 'obj2')) as f:
+ self.assertEquals('obj2', '\n'.join(f.readlines()))
+ with open(os.path.join(tmpdir1, 'obj2')) as f:
+ self.assertEquals('obj2', '\n'.join(f.readlines()))
+
+ # Check that re-running the same rsync command causes no more changes.
+ self.assertEquals(NO_CHANGES, self.RunGsUtil(
+ ['rsync', '-d', '-c', tmpdir1, tmpdir2], return_stderr=True))
+
+ # Now add and remove some objects in both dirs and test rsync -r.
+ self.CreateTempFile(tmpdir=tmpdir1, file_name='obj6', contents='obj6')
+ self.CreateTempFile(tmpdir=tmpdir2, file_name='obj7', contents='obj7')
+ os.unlink(os.path.join(tmpdir1, 'obj1'))
+ os.unlink(os.path.join(tmpdir2, 'obj2'))
+
+ self.RunGsUtil(['rsync', '-d', '-r', tmpdir1, tmpdir2])
+ listing1 = _TailSet(tmpdir1, self._FlatListDir(tmpdir1))
+ listing2 = _TailSet(tmpdir2, self._FlatListDir(tmpdir2))
+ # dir1 should have un-altered content.
+ self.assertEquals(listing1, set(['/obj2', '/obj6', '/subdir1/obj3']))
+ # dir2 should have content like dir but without the subdir objects
+ # synchronized.
+ self.assertEquals(listing2, set(['/obj2', '/obj6', '/subdir1/obj3']))
+
+ # Check that re-running the same rsync command causes no more changes.
+ self.assertEquals(NO_CHANGES, self.RunGsUtil(
+ ['rsync', '-d', '-r', tmpdir1, tmpdir2], return_stderr=True))
+
+ def test_dir_to_dir_minus_d_more_files_than_bufsize(self):
+ """Tests concurrently building listing from multiple tmp file
ranges."""
+ # Create 2 dirs, where each dir has 1000 objects and differing names.
+ tmpdir1 = self.CreateTempDir()
+ tmpdir2 = self.CreateTempDir()
+ for i in range(0, 1000):
+ self.CreateTempFile(tmpdir=tmpdir1, file_name='d1-%s' %i,
contents='x')
+ self.CreateTempFile(tmpdir=tmpdir2, file_name='d2-%s' %i,
contents='y')
+
+ # We open a new temp file each time we reach rsync_buffer_lines of
+ # listing output. On Windows, this will result in a 'too many open file
+ # handles' error, so choose a larger value so as not to open so many
files.
+ rsync_buffer_config = [('GSUtil', 'rsync_buffer_lines',
+ '50' if IS_WINDOWS else '2')]
+ # Run gsutil with config option to make buffer size << # files.
+ with SetBotoConfigForTest(rsync_buffer_config):
+ self.RunGsUtil(['rsync', '-d', tmpdir1, tmpdir2])
+ listing1 = _TailSet(tmpdir1, self._FlatListDir(tmpdir1))
+ listing2 = _TailSet(tmpdir2, self._FlatListDir(tmpdir2))
+ self.assertEquals(listing1, listing2)
+
+ # Check that re-running the same rsync command causes no more changes.
+ self.assertEquals(NO_CHANGES, self.RunGsUtil(
+ ['rsync', '-d', tmpdir1, tmpdir2], return_stderr=True))
+
+ @unittest.skipUnless(UsingCrcmodExtension(crcmod),
+ 'Test requires fast crcmod.')
+ def test_bucket_to_dir_minus_d(self):
+ """Tests that flat and recursive rsync bucket to dir works
correctly."""
+ # Create bucket and dir with 1 overlapping object, 1 extra object at
root
+ # level in each, and 1 extra object 1 level down in each. Make the
+ # overlapping objects named the same but with different content, to
test
+ # that we detect and properly copy in that case.
+ bucket_uri = self.CreateBucket()
+ tmpdir = self.CreateTempDir()
+ subdir = os.path.join(tmpdir, 'subdir')
+ os.mkdir(subdir)
+ self.CreateObject(bucket_uri=bucket_uri, object_name='obj1',
+ contents='obj1')
+ self.CreateObject(bucket_uri=bucket_uri, object_name='obj2',
+ contents='obj2')
+ self.CreateObject(bucket_uri=bucket_uri, object_name='subdir/obj3',
+ contents='subdir/obj3')
+ self.CreateTempFile(tmpdir=tmpdir, file_name='obj2', contents='OBJ2')
+ self.CreateTempFile(tmpdir=tmpdir, file_name='obj4', contents='obj4')
+ self.CreateTempFile(tmpdir=subdir, file_name='obj5',
contents='subdir/obj5')
+
+ # Use @Retry as hedge against bucket listing eventual consistency.
+ @Retry(AssertionError, tries=3, timeout_secs=1)
+ def _Check1():
+ """Tests rsync works as expected."""
+ self.RunGsUtil(['rsync', '-d', suri(bucket_uri), tmpdir])
+ listing1 = _TailSet(suri(bucket_uri),
self._FlatListBucket(bucket_uri))
+ listing2 = _TailSet(tmpdir, self._FlatListDir(tmpdir))
+ # Bucket should have un-altered content.
+ self.assertEquals(listing1, set(['/obj1', '/obj2', '/subdir/obj3']))
+ # Dir should have content like bucket but without the subdir objects
+ # synchronized.
+ self.assertEquals(listing2, set(['/obj1', '/obj2', '/subdir/obj5']))
+ # Assert that the src/dest objects that had same length but different
+ # content were not synchronized (bucket to dir sync doesn't use
checksums
+ # unless you specify -c).
+ self.assertEquals('obj2', self.RunGsUtil(
+ ['cat', suri(bucket_uri, 'obj2')], return_stdout=True))
+ with open(os.path.join(tmpdir, 'obj2')) as f:
+ self.assertEquals('OBJ2', '\n'.join(f.readlines()))
+ _Check1()
+
+ # Check that re-running the same rsync command causes no more changes.
+ self.assertEquals(NO_CHANGES, self.RunGsUtil(
+ ['rsync', '-d', suri(bucket_uri), tmpdir], return_stderr=True))
+
+ # Now rerun the sync with the -c option.
+ # Use @Retry as hedge against bucket listing eventual consistency.
+ @Retry(AssertionError, tries=3, timeout_secs=1)
+ def _Check2():
+ """Tests rsync -c works as expected."""
+ self.RunGsUtil(['rsync', '-d', '-c', suri(bucket_uri), tmpdir])
+ listing1 = _TailSet(suri(bucket_uri),
self._FlatListBucket(bucket_uri))
+ listing2 = _TailSet(tmpdir, self._FlatListDir(tmpdir))
+ # Bucket should have un-altered content.
+ self.assertEquals(listing1, set(['/obj1', '/obj2', '/subdir/obj3']))
+ # Dir should have content like bucket but without the subdir objects
+ # synchronized.
+ self.assertEquals(listing2, set(['/obj1', '/obj2', '/subdir/obj5']))
+ # Assert that the src/dest objects that had same length but different
+ # content were synchronized (bucket to dir sync with -c uses
checksums).
+ self.assertEquals('obj2', self.RunGsUtil(
+ ['cat', suri(bucket_uri, 'obj2')], return_stdout=True))
+ with open(os.path.join(tmpdir, 'obj2')) as f:
+ self.assertEquals('obj2', '\n'.join(f.readlines()))
+ _Check2()
+
+ # Check that re-running the same rsync command causes no more changes.
+ self.assertEquals(NO_CHANGES, self.RunGsUtil(
+ ['rsync', '-d', '-c', suri(bucket_uri), tmpdir],
return_stderr=True))
+
+ # Now add and remove some objects in dir and bucket and test rsync -r.
+ self.CreateObject(bucket_uri=bucket_uri, object_name='obj6',
+ contents='obj6')
+ self.CreateTempFile(tmpdir=tmpdir, file_name='obj7', contents='obj7')
+ self.RunGsUtil(['rm', suri(bucket_uri, 'obj1')])
+ os.unlink(os.path.join(tmpdir, 'obj2'))
+
+ # Use @Retry as hedge against bucket listing eventual consistency.
+ @Retry(AssertionError, tries=3, timeout_secs=1)
+ def _Check3():
+ self.RunGsUtil(['rsync', '-d', '-r', suri(bucket_uri), tmpdir])
+ listing1 = _TailSet(suri(bucket_uri),
self._FlatListBucket(bucket_uri))
+ listing2 = _TailSet(tmpdir, self._FlatListDir(tmpdir))
+ # Bucket should have un-altered content.
+ self.assertEquals(listing1, set(['/obj2', '/obj6', '/subdir/obj3']))
+ # Dir should have content like bucket but without the subdir objects
+ # synchronized.
+ self.assertEquals(listing2, set(['/obj2', '/obj6', '/subdir/obj3']))
+ _Check3()
+
+ # Check that re-running the same rsync command causes no more changes.
+ self.assertEquals(NO_CHANGES, self.RunGsUtil(
+ ['rsync', '-d', '-r', suri(bucket_uri), tmpdir],
return_stderr=True))
+
+ def test_bucket_to_dir_minus_d_with_fname_case_change(self):
+ """Tests that name case changes work correctly.
+
+ Example:
+
+ Windows filenames are case-preserving in what you wrote, but case-
+ insensitive when compared. If you synchronize from FS to cloud and then
+ change case-naming in local files, you could end up with this
situation:
+
+ Cloud copy is called .../TiVo/...
+ FS copy is called .../Tivo/...
+
+ Then, if you sync from cloud to FS, if rsync doesn't recognize that on
+ Windows these names are identical, each rsync run will cause both a
copy
+ and a delete to be executed.
+ """
+ # Create bucket and dir with same objects, but dir copy has different
name
+ # case.
+ bucket_uri = self.CreateBucket()
+ tmpdir = self.CreateTempDir()
+ self.CreateObject(bucket_uri=bucket_uri, object_name='obj1',
+ contents='obj1')
+ self.CreateTempFile(tmpdir=tmpdir, file_name='Obj1', contents='obj1')
+
+ # Use @Retry as hedge against bucket listing eventual consistency.
+ @Retry(AssertionError, tries=3, timeout_secs=1)
+ def _Check1():
+ """Tests rsync works as expected."""
+ output = self.RunGsUtil(
+ ['rsync', '-d', '-r', suri(bucket_uri), tmpdir],
return_stderr=True)
+ # Nothing should be copied or removed under Windows.
+ if IS_WINDOWS:
+ self.assertEquals(NO_CHANGES, output)
+ else:
+ self.assertNotEquals(NO_CHANGES, output)
+ _Check1()
+
+ def test_bucket_to_dir_minus_d_with_leftover_dir_placeholder(self):
+ """Tests that we correctly handle leftover dir placeholders.
+
+ See comments in gslib.commands.rsync._FieldedListingIterator for
details.
+ """
+ bucket_uri = self.CreateBucket()
+ tmpdir = self.CreateTempDir()
+ self.CreateObject(bucket_uri=bucket_uri, object_name='obj1',
+ contents='obj1')
+ # Create a placeholder like what can be left over by web GUI tools.
+ key_uri = bucket_uri.clone_replace_name('/')
+ key_uri.set_contents_from_string('')
+
+ # Use @Retry as hedge against bucket listing eventual consistency.
+ @Retry(AssertionError, tries=3, timeout_secs=1)
+ def _Check1():
+ """Tests rsync works as expected."""
+ output = self.RunGsUtil(
+ ['rsync', '-d', '-r', suri(bucket_uri), tmpdir],
return_stderr=True)
+ listing1 = _TailSet(suri(bucket_uri),
self._FlatListBucket(bucket_uri))
+ listing2 = _TailSet(tmpdir, self._FlatListDir(tmpdir))
+ # Bucket should have un-altered content.
+ self.assertEquals(listing1, set(['/obj1', '//']))
+ # Bucket should not have the placeholder object.
+ self.assertEquals(listing2, set(['/obj1']))
+ # Stdout should report what happened.
+ self.assertRegexpMatches(output, r'.*Skipping cloud sub-directory.*')
+ _Check1()
+
+ @unittest.skipIf(IS_WINDOWS, 'os.symlink() is not available on Windows.')
+ def test_rsync_minus_d_minus_e(self):
+ """Tests that rsync -e ignores symlinks."""
+ tmpdir = self.CreateTempDir()
+ subdir = os.path.join(tmpdir, 'subdir')
+ os.mkdir(subdir)
+ bucket_uri = self.CreateBucket()
+ fpath1 = self.CreateTempFile(
+ tmpdir=tmpdir, file_name='obj1', contents='obj1')
+ self.CreateTempFile(tmpdir=tmpdir, file_name='obj2', contents='obj2')
+ self.CreateTempFile(tmpdir=subdir, file_name='obj3',
contents='subdir/obj3')
+ good_symlink_path = os.path.join(tmpdir, 'symlink1')
+ os.symlink(fpath1, good_symlink_path)
+ # Make a symlink that points to a non-existent path to test that -e
also
+ # handles that case.
+ bad_symlink_path = os.path.join(tmpdir, 'symlink2')
+ os.symlink(os.path.join('/', 'non-existent'), bad_symlink_path)
+ self.CreateObject(bucket_uri=bucket_uri, object_name='obj2',
+ contents='OBJ2')
+ self.CreateObject(bucket_uri=bucket_uri, object_name='obj4',
+ contents='obj4')
+ self.CreateObject(bucket_uri=bucket_uri, object_name='subdir/obj5',
+ contents='subdir/obj5')
+
+ # Use @Retry as hedge against bucket listing eventual consistency.
+ @Retry(AssertionError, tries=3, timeout_secs=1)
+ def _Check1():
+ """Ensure listings match the commented expectations."""
+ self.RunGsUtil(['rsync', '-d', '-e', tmpdir, suri(bucket_uri)])
+ listing1 = _TailSet(tmpdir, self._FlatListDir(tmpdir))
+ listing2 = _TailSet(suri(bucket_uri),
self._FlatListBucket(bucket_uri))
+ # Dir should have un-altered content.
+ self.assertEquals(
+ listing1,
+
set(['/obj1', '/obj2', '/subdir/obj3', '/symlink1', '/symlink2']))
+ # Bucket should have content like dir but without the symlink, and
+ # without subdir objects synchronized.
+ self.assertEquals(listing2, set(['/obj1', '/obj2', '/subdir/obj5']))
+ _Check1()
+
+ # Now remove invalid symlink and run without -e, and see that symlink
gets
+ # copied (as file to which it points). Use @Retry as hedge against
bucket
+ # listing eventual consistency.
+ os.unlink(bad_symlink_path)
+ @Retry(AssertionError, tries=3, timeout_secs=1)
+ def _Check2():
+ """Tests rsync works as expected."""
+ self.RunGsUtil(['rsync', '-d', tmpdir, suri(bucket_uri)])
+ listing1 = _TailSet(tmpdir, self._FlatListDir(tmpdir))
+ listing2 = _TailSet(suri(bucket_uri),
self._FlatListBucket(bucket_uri))
+ # Dir should have un-altered content.
+ self.assertEquals(
+ listing1, set(['/obj1', '/obj2', '/subdir/obj3', '/symlink1']))
+ # Bucket should have content like dir but without the symlink, and
+ # without subdir objects synchronized.
+ self.assertEquals(
+ listing2, set(['/obj1', '/obj2', '/subdir/obj5', '/symlink1']))
+ self.assertEquals('obj1', self.RunGsUtil(
+ ['cat', suri(bucket_uri, 'symlink1')], return_stdout=True))
+ _Check2()
+
+ # Check that re-running the same rsync command causes no more changes.
+ self.assertEquals(NO_CHANGES, self.RunGsUtil(
+ ['rsync', '-d', tmpdir, suri(bucket_uri)], return_stderr=True))
+
+ @SkipForS3('S3 does not support composite objects')
+ def test_bucket_to_bucket_minus_d_with_composites(self):
+ """Tests that rsync works with composite objects (which don't have
MD5s)."""
+ bucket1_uri = self.CreateBucket()
+ bucket2_uri = self.CreateBucket()
+ self.CreateObject(bucket_uri=bucket1_uri, object_name='obj1',
+ contents='obj1')
+ self.CreateObject(bucket_uri=bucket1_uri, object_name='obj2',
+ contents='obj2')
+ self.RunGsUtil(
+ ['compose', suri(bucket1_uri, 'obj1'), suri(bucket1_uri, 'obj2'),
+ suri(bucket1_uri, 'obj3')])
+ self.CreateObject(bucket_uri=bucket2_uri, object_name='obj2',
+ contents='OBJ2')
+ self.CreateObject(bucket_uri=bucket2_uri, object_name='obj4',
+ contents='obj4')
+
+ # Use @Retry as hedge against bucket listing eventual consistency.
+ @Retry(AssertionError, tries=3, timeout_secs=1)
+ def _Check():
+ self.RunGsUtil(['rsync', '-d', suri(bucket1_uri), suri(bucket2_uri)])
+ listing1 = _TailSet(suri(bucket1_uri),
self._FlatListBucket(bucket1_uri))
+ listing2 = _TailSet(suri(bucket2_uri),
self._FlatListBucket(bucket2_uri))
+ # First bucket should have un-altered content.
+ self.assertEquals(listing1, set(['/obj1', '/obj2', '/obj3']))
+ # Second bucket should have content like first bucket but without the
+ # subdir objects synchronized.
+ self.assertEquals(listing2, set(['/obj1', '/obj2', '/obj3']))
+ _Check()
+
+ # Check that re-running the same rsync command causes no more changes.
+ self.assertEquals(NO_CHANGES, self.RunGsUtil(
+ ['rsync', '-d', suri(bucket1_uri), suri(bucket2_uri)],
+ return_stderr=True))
+
+ def test_bucket_to_bucket_minus_d_empty_dest(self):
+ """Tests working with empty dest bucket (iter runs out before src
iter)."""
+ bucket1_uri = self.CreateBucket()
+ bucket2_uri = self.CreateBucket()
+ self.CreateObject(bucket_uri=bucket1_uri, object_name='obj1',
+ contents='obj1')
+ self.CreateObject(bucket_uri=bucket1_uri, object_name='obj2',
+ contents='obj2')
+
+ # Use @Retry as hedge against bucket listing eventual consistency.
+ @Retry(AssertionError, tries=3, timeout_secs=1)
+ def _Check():
+ self.RunGsUtil(['rsync', '-d', suri(bucket1_uri), suri(bucket2_uri)])
+ listing1 = _TailSet(suri(bucket1_uri),
self._FlatListBucket(bucket1_uri))
+ listing2 = _TailSet(suri(bucket2_uri),
self._FlatListBucket(bucket2_uri))
+ self.assertEquals(listing1, set(['/obj1', '/obj2']))
+ self.assertEquals(listing2, set(['/obj1', '/obj2']))
+ _Check()
+
+ # Check that re-running the same rsync command causes no more changes.
+ self.assertEquals(NO_CHANGES, self.RunGsUtil(
+ ['rsync', '-d', suri(bucket1_uri), suri(bucket2_uri)],
+ return_stderr=True))
+
+ def test_bucket_to_bucket_minus_d_empty_src(self):
+ """Tests working with empty src bucket (iter runs out before dst
iter)."""
+ bucket1_uri = self.CreateBucket()
+ bucket2_uri = self.CreateBucket()
+ self.CreateObject(bucket_uri=bucket2_uri, object_name='obj1',
+ contents='obj1')
+ self.CreateObject(bucket_uri=bucket2_uri, object_name='obj2',
+ contents='obj2')
+
+ # Use @Retry as hedge against bucket listing eventual consistency.
+ @Retry(AssertionError, tries=3, timeout_secs=1)
+ def _Check():
+ self.RunGsUtil(['rsync', '-d', suri(bucket1_uri), suri(bucket2_uri)])
+ stderr = self.RunGsUtil(['ls', suri(bucket1_uri, '**')],
+ expected_status=1, return_stderr=True)
+ self.assertIn('One or more URLs matched no objects', stderr)
+ stderr = self.RunGsUtil(['ls', suri(bucket2_uri, '**')],
+ expected_status=1, return_stderr=True)
+ self.assertIn('One or more URLs matched no objects', stderr)
+ _Check()
+
+ # Check that re-running the same rsync command causes no more changes.
+ self.assertEquals(NO_CHANGES, self.RunGsUtil(
+ ['rsync', '-d', suri(bucket1_uri), suri(bucket2_uri)],
+ return_stderr=True))
+
+ def test_rsync_minus_d_minus_p(self):
+ """Tests that rsync -p preserves ACLs."""
+ bucket1_uri = self.CreateBucket()
+ bucket2_uri = self.CreateBucket()
+ self.CreateObject(bucket_uri=bucket1_uri, object_name='obj1',
+ contents='obj1')
+ # Set public-read (non-default) ACL so we can verify that rsync -p
works.
+ self.RunGsUtil(['acl', 'set', 'public-read',
suri(bucket1_uri, 'obj1')])
+
+ # Use @Retry as hedge against bucket listing eventual consistency.
+ @Retry(AssertionError, tries=3, timeout_secs=1)
+ def _Check():
+ """Tests rsync -p works as expected."""
+ self.RunGsUtil(['rsync', '-d', '-p', suri(bucket1_uri),
+ suri(bucket2_uri)])
+ listing1 = _TailSet(suri(bucket1_uri),
self._FlatListBucket(bucket1_uri))
+ listing2 = _TailSet(suri(bucket2_uri),
self._FlatListBucket(bucket2_uri))
+ self.assertEquals(listing1, set(['/obj1']))
+ self.assertEquals(listing2, set(['/obj1']))
+ acl1_json = self.RunGsUtil(['acl', 'get', suri(bucket1_uri, 'obj1')],
+ return_stdout=True)
+ acl2_json = self.RunGsUtil(['acl', 'get', suri(bucket2_uri, 'obj1')],
+ return_stdout=True)
+ self.assertEquals(acl1_json, acl2_json)
+ _Check()
+
+ # Check that re-running the same rsync command causes no more changes.
+ self.assertEquals(NO_CHANGES, self.RunGsUtil(
+ ['rsync', '-d', '-p', suri(bucket1_uri), suri(bucket2_uri)],
+ return_stderr=True))
=======================================
--- /dev/null
+++ /third_party/gsutil/gslib/tests/test_rsync_funcs.py Mon Nov 3 12:38:28
2014 UTC
@@ -0,0 +1,72 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Unit tests for functions in rsync command."""
+
+import logging
+import os
+
+from gslib.commands.rsync import _ComputeNeededFileChecksums
+from gslib.commands.rsync import _NA
+from gslib.hashing_helper import CalculateB64EncodedCrc32cFromContents
+from gslib.hashing_helper import CalculateB64EncodedMd5FromContents
+from gslib.tests.testcase.unit_testcase import GsUtilUnitTestCase
+
+
+class TestRsyncFuncs(GsUtilUnitTestCase):
+
+ def test_compute_needed_file_checksums(self):
+ """Tests that we compute all/only needed file checksums."""
+ size = 4
+ logger = logging.getLogger()
+ tmpdir = self.CreateTempDir()
+ file_url_str = 'file://%s' % os.path.join(tmpdir, 'obj1')
+ self.CreateTempFile(tmpdir=tmpdir, file_name='obj1', contents='obj1')
+ cloud_url_str = 'gs://whatever'
+ with open(os.path.join(tmpdir, 'obj1'), 'rb') as fp:
+ crc32c = CalculateB64EncodedCrc32cFromContents(fp)
+ fp.seek(0)
+ md5 = CalculateB64EncodedMd5FromContents(fp)
+
+ # Test case where source is a file and dest has CRC32C.
+ (src_crc32c, src_md5, dst_crc32c, dst_md5) =
_ComputeNeededFileChecksums(
+ logger, file_url_str, size, _NA, _NA, cloud_url_str, size, crc32c,
_NA)
+ self.assertEquals(crc32c, src_crc32c)
+ self.assertEquals(_NA, src_md5)
+ self.assertEquals(crc32c, dst_crc32c)
+ self.assertEquals(_NA, dst_md5)
+
+ # Test case where source is a file and dest has MD5 but not CRC32C.
+ (src_crc32c, src_md5, dst_crc32c, dst_md5) =
_ComputeNeededFileChecksums(
+ logger, file_url_str, size, _NA, _NA, cloud_url_str, size, _NA,
md5)
+ self.assertEquals(_NA, src_crc32c)
+ self.assertEquals(md5, src_md5)
+ self.assertEquals(_NA, dst_crc32c)
+ self.assertEquals(md5, dst_md5)
+
+ # Test case where dest is a file and src has CRC32C.
+ (src_crc32c, src_md5, dst_crc32c, dst_md5) =
_ComputeNeededFileChecksums(
+ logger, cloud_url_str, size, crc32c, _NA, file_url_str, size, _NA,
_NA)
+ self.assertEquals(crc32c, dst_crc32c)
+ self.assertEquals(_NA, src_md5)
+ self.assertEquals(crc32c, src_crc32c)
+ self.assertEquals(_NA, src_md5)
+
+ # Test case where dest is a file and src has MD5 but not CRC32C.
+ (src_crc32c, src_md5, dst_crc32c, dst_md5) =
_ComputeNeededFileChecksums(
+ logger, cloud_url_str, size, _NA, md5, file_url_str, size, _NA,
_NA)
+ self.assertEquals(_NA, dst_crc32c)
+ self.assertEquals(md5, src_md5)
+ self.assertEquals(_NA, src_crc32c)
+ self.assertEquals(md5, src_md5)
=======================================
--- /dev/null
+++ /third_party/gsutil/gslib/tests/test_signurl.py Mon Nov 3 12:38:28
2014 UTC
@@ -0,0 +1,202 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for signurl command."""
+from datetime import timedelta
+import pkgutil
+
+import gslib.commands.signurl
+from gslib.commands.signurl import HAVE_OPENSSL
+from gslib.exception import CommandException
+import gslib.tests.testcase as testcase
+from gslib.tests.testcase.integration_testcase import SkipForS3
+from gslib.tests.util import ObjectToURI as suri
+from gslib.tests.util import unittest
+
+
+# pylint: disable=protected-access
+...@unittest.skipUnless(HAVE_OPENSSL, 'signurl requires pyopenssl.')
+@SkipForS3('Signed URLs are only supported for gs:// URLs.')
+class TestSignUrl(testcase.GsUtilIntegrationTestCase):
+ """Integration tests for signurl command."""
+
+ def _GetKsFile(self):
+ if not hasattr(self, 'ks_file'):
+ # Dummy pkcs12 keystore generated with the command
+
+ # openssl req -new -passout pass:notasecret -batch \
+ # -x509 -keyout signed_url_test.key -out signed_url_test.pem \
+ # -subj '/CN=test.apps.googleusercontent.com'
+
+ # &&
+
+ # openssl pkcs12 -export -passin pass:notasecret \
+ # -passout pass:notasecret -inkey signed_url_test.key \
+ # -in signed_url_test.pem -out test.p12
+
+ # &&
+
+ # rm signed_url_test.key signed_url_test.pem
+ contents = pkgutil.get_data('gslib', 'tests/test_data/test.p12')
+ self.ks_file = self.CreateTempFile(contents=contents)
+ return self.ks_file
+
+ def testSignUrlOutput(self):
+ """Tests signurl output of a sample object."""
+
+ object_url = self.CreateObject(contents='z')
+ stdout = self.RunGsUtil(['signurl', '-p', 'notasecret',
+ self._GetKsFile(), suri(object_url)],
+ return_stdout=True)
+
+ self.assertIn(object_url.uri, stdout)
+ self.assertIn('te...@developer.gserviceaccount.com', stdout)
+ self.assertIn('Expires=', stdout)
+ self.assertIn('\tGET\t', stdout)
+
+ stdout = self.RunGsUtil(['signurl', '-m', 'PUT', '-p',
+ 'notasecret', self._GetKsFile(),
+ 'gs://test/test.txt'], return_stdout=True)
+
+ self.assertIn('te...@developer.gserviceaccount.com', stdout)
+ self.assertIn('Expires=', stdout)
+ self.assertIn('\tPUT\t', stdout)
+
+ def testSignUrlWithWildcard(self):
+ objs = ['test1', 'test2', 'test3']
+ bucket = self.CreateBucket()
+ obj_urls = []
+
+ for obj_name in objs:
+ obj_urls.append(self.CreateObject(bucket_uri=bucket,
+ object_name=obj_name, contents=''))
+
+ stdout = self.RunGsUtil(['signurl', '-p',
+ 'notasecret', self._GetKsFile(),
+ suri(bucket) + '/*'], return_stdout=True)
+
+ # Header, 3 signed urls, trailing newline
+ self.assertEquals(len(stdout.split('\n')), 5)
+
+ for obj_url in obj_urls:
+ self.assertIn(suri(obj_url), stdout)
+
+ def testSignUrlOfNonObjectUrl(self):
+ """Tests the signurl output of a non-existent file."""
+ self.RunGsUtil(['signurl', self._GetKsFile(), 'gs://'],
+ expected_status=1, stdin='notasecret')
+ self.RunGsUtil(['signurl', 'file://tmp/abc'], expected_status=1)
+
+
+...@unittest.skipUnless(HAVE_OPENSSL, 'signurl requires pyopenssl.')
+class UnitTestSignUrl(testcase.GsUtilUnitTestCase):
+ """Unit tests for the signurl command."""
+
+ def setUp(self):
+ super(UnitTestSignUrl, self).setUp()
+ self.ks_contents =
pkgutil.get_data('gslib', 'tests/test_data/test.p12')
+
+ def testDurationSpec(self):
+ tests = [('1h', timedelta(hours=1)),
+ ('2d', timedelta(days=2)),
+ ('5D', timedelta(days=5)),
+ ('35s', timedelta(seconds=35)),
+ ('1h', timedelta(hours=1)),
+ ('33', timedelta(hours=33)),
+ ('22m', timedelta(minutes=22)),
+ ('3.7', None),
+ ('27Z', None),
+ ]
+
+ for inp, expected in tests:
+ try:
+ td = gslib.commands.signurl._DurationToTimeDelta(inp)
+ self.assertEquals(td, expected)
+ except CommandException:
+ if expected is not None:
+ self.fail('{0} failed to parse')
+
+ def testSignPut(self):
+ """Tests the return value of the _GenSignedUrl function with \
+ a PUT method."""
+
+ expected = ('https://storage.googleapis.com/test/test.txt?'
+ 'GoogleAccessId=te...@developer.gserviceaccount.com'
+ '&Expires=1391816302&Signature=A6QbgTA8cXZCtjy2xCr401bdi0e'
+ '7zChTBQ6BX61L7AfytTGEQDMD%2BbvOQKjX7%2FsEh77cmzcSxOEKqTLUD'
+ 'bbkPgPqW3j8sGPSRX9VM58bgj1vt9yU8cRKoegFHXAqsATx2G5rc%2FvEl'
+ 'iFp9UWMfVj5TaukqlBAVuzZWlyx0aQa9tCKXRtC9YcxORxG41RfiowA2kd8'
+ 'XBTQt4M9XTzpVyr5rVMzfr2LvtGf9UAJvlt8p6T6nThl2vy9%2FwBoPcMFa'
+ 'OWQcGTagwjyKWDcI1vQPIFQLGftAcv3QnGZxZTtg8pZW%2FIxRJrBhfFfcA'
+ 'c62hDKyaU2YssSMy%2FjUJynWx3TIiJjhg%3D%3D')
+
+ expiration = 1391816302
+ ks, client_id = (gslib.commands.signurl
+ ._ReadKeystore(self.ks_contents, 'notasecret'))
+ signed_url = (gslib.commands.signurl
+ ._GenSignedUrl(ks.get_privatekey(),
+ client_id, 'PUT', '',
+ '', expiration, 'test/test.txt'))
+ self.assertEquals(expected, signed_url)
+
+ def testSignurlPutContentype(self):
+ """Tests the return value of the _GenSignedUrl function with \
+ a PUT method and specified content type."""
+
+ expected = ('https://storage.googleapis.com/test/test.txt?'
+ 'GoogleAccessId=te...@developer.gserviceaccount.com&'
+ 'Expires=1391816302&Signature=APn%2BCCVcQrfc1fKQXrs'
+ 'PEZFj9%2FmASO%2BolR8xwgBY6PbWMkcCtrUVFBauP6t4NxqZO'
+ 'UnbOFYTZYzul0RC57ZkEWJp3VcyDIHcn6usEE%2FTzUHhbDCDW'
+ 'awAkZS7p8kO8IIACuJlF5s9xZmZzaEBtzF0%2BBOsGgBPBlg2y'
+ 'zrhFB6cyyAwNiUgmhLQaVkdobnSwtI5QJkvXoIjJb6hhLiVbLC'
+ 'rWdgSZVusjAKGlWCJsM%2B4TkCR%2Bi8AnrkECngcMHuJ9mYbS'
+ 'XI1VfEmcnRVcfkKkJGZGctaDIWK%2FMTEmfYCW6USt3Zk2WowJ'
+ 'SGuJHqEcFz0kyfAlkpmG%2Fl5E1FQROYqLN2kZQ%3D%3D')
+
+ expiration = 1391816302
+ ks, client_id = (gslib.commands.signurl
+ ._ReadKeystore(self.ks_contents,
+ 'notasecret'))
+ signed_url = (gslib.commands.signurl
+ ._GenSignedUrl(ks.get_privatekey(),
+ client_id, 'PUT', '',
+ 'text/plain', expiration,
+ 'test/test.txt'))
+ self.assertEquals(expected, signed_url)
+
+ def testSignurlGet(self):
+ """Tests the return value of the _GenSignedUrl function with \
+ a GET method."""
+
+ expected = ('https://storage.googleapis.com/test/test.txt?'
+ 'GoogleAccessId=te...@developer.gserviceaccount.com&'
+ 'Expires=0&Signature=TCZwe32cU%2BMksmLiSY9shHXQjLs1'
+ 'F3y%2F%2F1M0UhiK4qsPRVNZVwI7YWvv2qa2Xa%2BVBBafboF0'
+ '1%2BWvx3ZG316pwpNIRR6y7jNnE0LvQmHE8afbm2VYCi%2B2JS'
+ 'ZK2YZFJAyEek8si53jhYQEmaRq1zPfGbX84B2FJ8v4iI%2FTC1'
+ 'I9OE5vHF0sWwIR9d73JDrFLjaync7QYFWRExdwvqlQX%2BPO3r'
+ 'OG9Ns%2BcQFIN7npnsVjH28yNY9gBzXya8LYmNvUx6bWHWZMiu'
+ 'fLwDZ0jejNeDZTOfQGRM%2B0vY7NslzaT06W1wo8P7McSkAZEl'
+ 'DCbhR0Vo1fturPMwmAhi88f0qzRzywbg%3D%3D')
+
+ expiration = 0
+ ks, client_id = (gslib.commands.signurl
+ ._ReadKeystore(self.ks_contents,
+ 'notasecret'))
+ signed_url = (gslib.commands.signurl
+ ._GenSignedUrl(ks.get_privatekey(),
+ client_id, 'GET', '',
+ '', expiration, 'test/test.txt'))
+ self.assertEquals(expected, signed_url)
=======================================
--- /dev/null
+++ /third_party/gsutil/gslib/third_party/protorpc/__init__.py Mon Nov 3
12:38:28 2014 UTC
@@ -0,0 +1,25 @@
+#!/usr/bin/env python
+#
+# Copyright 2011 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""Main module for ProtoRPC package."""
+
+__author__ = 'ra...@google.com (Rafe Kaplan)'
+
+import logging
+import new
+import os
+import sys
=======================================
--- /dev/null
+++ /third_party/gsutil/gslib/third_party/protorpc/message_types.py Mon
Nov 3 12:38:28 2014 UTC
@@ -0,0 +1,120 @@
+#!/usr/bin/env python
+#
+# Copyright 2010 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""Simple protocol message types.
+
+Includes new message and field types that are outside what is defined by
the
+protocol buffers standard.
+"""
+
+__author__ = 'ra...@google.com (Rafe Kaplan)'
+
+import datetime
+
+from gslib.third_party.protorpc import messages
+from gslib.third_party.protorpc import util
+
+__all__ = [
+ 'DateTimeField',
+ 'DateTimeMessage',
+ 'VoidMessage',
+]
+
+class VoidMessage(messages.Message):
+ """Empty message."""
+
+
+class DateTimeMessage(messages.Message):
+ """Message to store/transmit a DateTime.
+
+ Fields:
+ milliseconds: Milliseconds since Jan 1st 1970 local time.
+ time_zone_offset: Optional time zone offset, in minutes from UTC.
+ """
+ milliseconds = messages.IntegerField(1, required=True)
+ time_zone_offset = messages.IntegerField(2)
+
+
+class DateTimeField(messages.MessageField):
+ """Field definition for datetime values.
+
+ Stores a python datetime object as a field. If time zone information is
+ included in the datetime object, it will be included in
+ the encoded data when this is encoded/decoded.
+ """
+
+ type = datetime.datetime
+
+ message_type = DateTimeMessage
+
+ @util.positional(3)
+ def __init__(self,
+ number,
+ **kwargs):
+ super(DateTimeField, self).__init__(self.message_type,
+ number,
+ **kwargs)
+
+ def value_from_message(self, message):
+ """Convert DateTimeMessage to a datetime.
+
+ Args:
+ A DateTimeMessage instance.
+
+ Returns:
+ A datetime instance.
+ """
+ message = super(DateTimeField, self).value_from_message(message)
+ if message.time_zone_offset is None:
+ return datetime.datetime.utcfromtimestamp(message.milliseconds /
1000.0)
+
+ # Need to subtract the time zone offset, because when we call
+ # datetime.fromtimestamp, it will add the time zone offset to the
+ # value we pass.
+ milliseconds = (message.milliseconds -
+ 60000 * message.time_zone_offset)
+
+ timezone = util.TimeZoneOffset(message.time_zone_offset)
+ return datetime.datetime.fromtimestamp(milliseconds / 1000.0,
+ tz=timezone)
+
+ def value_to_message(self, value):
+ value = super(DateTimeField, self).value_to_message(value)
+ # First, determine the delta from the epoch, so we can fill in
+ # DateTimeMessage's milliseconds field.
+ if value.tzinfo is None:
+ time_zone_offset = 0
+ local_epoch = datetime.datetime.utcfromtimestamp(0)
+ else:
+ time_zone_offset = util.timedelta_totalseconds(
+ value.tzinfo.utcoffset(value))
+ # Determine Jan 1, 1970 local time.
+ local_epoch = datetime.datetime.fromtimestamp(-time_zone_offset,
+ tz=value.tzinfo)
+ delta = value - local_epoch
+
+ # Create and fill in the DateTimeMessage, including time zone if
+ # one was specified.
+ message = DateTimeMessage()
+ message.milliseconds = int(util.timedelta_totalseconds(delta) * 1000)
+ if value.tzinfo is not None:
+ utc_offset = value.tzinfo.utcoffset(value)
+ if utc_offset is not None:
+ message.time_zone_offset = int(
+ util.timedelta_totalseconds(value.tzinfo.utcoffset(value)) /
60)
+
+ return message
=======================================
--- /dev/null
+++ /third_party/gsutil/gslib/third_party/protorpc/messages.py Mon Nov 3
12:38:28 2014 UTC
@@ -0,0 +1,1847 @@
+#!/usr/bin/env python
+#
+# Copyright 2010 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""Stand-alone implementation of in memory protocol messages.
+
+Public Classes:
+ Enum: Represents an enumerated type.
+ Variant: Hint for wire format to determine how to serialize.
+ Message: Base class for user defined messages.
+ IntegerField: Field for integer values.
+ FloatField: Field for float values.
+ BooleanField: Field for boolean values.
+ BytesField: Field for binary string values.
+ StringField: Field for UTF-8 string values.
+ MessageField: Field for other message type values.
+ EnumField: Field for enumerated type values.
+
+Public Exceptions (indentation indications class hierarchy):
+ EnumDefinitionError: Raised when enumeration is incorrectly defined.
+ FieldDefinitionError: Raised when field is incorrectly defined.
+ InvalidVariantError: Raised when variant is not compatible with field
type.
+ InvalidDefaultError: Raised when default is not compatiable with field.
+ InvalidNumberError: Raised when field number is out of range or
reserved.
+ MessageDefinitionError: Raised when message is incorrectly defined.
+ DuplicateNumberError: Raised when field has duplicate number with
another.
+ ValidationError: Raised when a message or field is not valid.
+ DefinitionNotFoundError: Raised when definition not found.
+"""
+
+__author__ = 'ra...@google.com (Rafe Kaplan)'
+
+
+import inspect
+import os
+import sys
+import traceback
+import types
+import weakref
+
+from gslib.third_party.protorpc import util
+
+__all__ = ['MAX_ENUM_VALUE',
+ 'MAX_FIELD_NUMBER',
+ 'FIRST_RESERVED_FIELD_NUMBER',
+ 'LAST_RESERVED_FIELD_NUMBER',
+
+ 'Enum',
+ 'Field',
+ 'FieldList',
+ 'Variant',
+ 'Message',
+ 'IntegerField',
+ 'FloatField',
+ 'BooleanField',
+ 'BytesField',
+ 'StringField',
+ 'MessageField',
+ 'EnumField',
+ 'find_definition',
+
+ 'Error',
+ 'DecodeError',
+ 'EncodeError',
+ 'EnumDefinitionError',
+ 'FieldDefinitionError',
+ 'InvalidVariantError',
+ 'InvalidDefaultError',
+ 'InvalidNumberError',
+ 'MessageDefinitionError',
+ 'DuplicateNumberError',
+ 'ValidationError',
+ 'DefinitionNotFoundError',
+ ]
+
+
+# TODO(rafek): Add extended module test to ensure all exceptions
+# in services extends Error.
+Error = util.Error
+
+
+class EnumDefinitionError(Error):
+ """Enumeration definition error."""
+
+
+class FieldDefinitionError(Error):
+ """Field definition error."""
+
+
+class InvalidVariantError(FieldDefinitionError):
+ """Invalid variant provided to field."""
+
+
+class InvalidDefaultError(FieldDefinitionError):
+ """Invalid default provided to field."""
+
+
+class InvalidNumberError(FieldDefinitionError):
+ """Invalid number provided to field."""
+
+
+class MessageDefinitionError(Error):
+ """Message definition error."""
+
+
+class DuplicateNumberError(Error):
+ """Duplicate number assigned to field."""
+
+
+class DefinitionNotFoundError(Error):
+ """Raised when definition is not found."""
+
+
+class DecodeError(Error):
+ """Error found decoding message from encoded form."""
+
+
+class EncodeError(Error):
+ """Error found when encoding message."""
+
+
+class ValidationError(Error):
+ """Invalid value for message error."""
+
+ def __str__(self):
+ """Prints string with field name if present on exception."""
+ message = Error.__str__(self)
+ try:
+ field_name = self.field_name
+ except AttributeError:
+ return message
+ else:
+ return message
+
+
+# Attributes that are reserved by a class definition that
+# may not be used by either Enum or Message class definitions.
+_RESERVED_ATTRIBUTE_NAMES = frozenset(
+ ['__module__', '__doc__'])
+
+_POST_INIT_FIELD_ATTRIBUTE_NAMES = frozenset(
+ ['name',
+ '_message_definition',
+ '_MessageField__type',
+ '_EnumField__type',
+ '_EnumField__resolved_default'])
+
+_POST_INIT_ATTRIBUTE_NAMES = frozenset(
+ ['_message_definition'])
+
+# Maximum enumeration value as defined by the protocol buffers standard.
+# All enum values must be less than or equal to this value.
+MAX_ENUM_VALUE = (2 ** 29) - 1
+
+# Maximum field number as defined by the protocol buffers standard.
+# All field numbers must be less than or equal to this value.
+MAX_FIELD_NUMBER = (2 ** 29) - 1
+
+# Field numbers between 19000 and 19999 inclusive are reserved by the
+# protobuf protocol and may not be used by fields.
+FIRST_RESERVED_FIELD_NUMBER = 19000
+LAST_RESERVED_FIELD_NUMBER = 19999
+
+
+class _DefinitionClass(type):
+ """Base meta-class used for definition meta-classes.
+
+ The Enum and Message definition classes share some basic functionality.
+ Both of these classes may be contained by a Message definition. After
+ initialization, neither class may have attributes changed
+ except for the protected _message_definition attribute, and that
attribute
+ may change only once.
+ """
+
+ __initialized = False
+
+ def __init__(cls, name, bases, dct):
+ """Constructor."""
+ type.__init__(cls, name, bases, dct)
+ # Base classes may never be initialized.
+ if cls.__bases__ != (object,):
+ cls.__initialized = True
+
+ def message_definition(cls):
+ """Get outer Message definition that contains this definition.
+
+ Returns:
+ Containing Message definition if definition is contained within one,
+ else None.
+ """
+ try:
+ return cls._message_definition()
+ except AttributeError:
+ return None
+
+ def __setattr__(cls, name, value):
+ """Overridden so that cannot set variables on definition classes after
init.
+
+ Setting attributes on a class must work during the period of
initialization
+ to set the enumation value class variables and build the name/number
maps.
+ Once __init__ has set the __initialized flag to True prohibits setting
any
+ more values on the class. The class is in effect frozen.
+
+ Args:
+ name: Name of value to set.
+ value: Value to set.
+ """
+ if cls.__initialized and name not in _POST_INIT_ATTRIBUTE_NAMES:
+ raise AttributeError('May not change values: %s' % name)
+ else:
+ type.__setattr__(cls, name, value)
+
+ def __delattr__(cls, name):
+ """Overridden so that cannot delete varaibles on definition classes."""
+ raise TypeError('May not delete attributes on definition class')
+
+ def definition_name(cls):
+ """Helper method for creating definition name.
+
+ Names will be generated to include the classes package name, scope (if
the
+ class is nested in another definition) and class name.
+
+ By default, the package name for a definition is derived from its
module
+ name. However, this value can be overriden by placing a 'package'
attribute
+ in the module that contains the definition class. For example:
+
+ package = 'some.alternate.package'
+
+ class MyMessage(Message):
+ ...
+
+ >>> MyMessage.definition_name()
+ some.alternate.package.MyMessage
+
+ Returns:
+ Dot-separated fully qualified name of definition.
+ """
+ outer_definition_name = cls.outer_definition_name()
+ if outer_definition_name is None:
+ return unicode(cls.__name__)
+ else:
+ return u'%s.%s' % (outer_definition_name, cls.__name__)
+
+ def outer_definition_name(cls):
+ """Helper method for creating outer definition name.
+
+ Returns:
+ If definition is nested, will return the outer definitions name,
else the
+ package name.
+ """
+ outer_definition = cls.message_definition()
+ if not outer_definition:
+ return util.get_package_for_module(cls.__module__)
+ else:
+ return outer_definition.definition_name()
+
+ def definition_package(cls):
+ """Helper method for creating creating the package of a definition.
+
+ Returns:
+ Name of package that definition belongs to.
+ """
+ outer_definition = cls.message_definition()
+ if not outer_definition:
+ return util.get_package_for_module(cls.__module__)
+ else:
+ return outer_definition.definition_package()
+
+
+class _EnumClass(_DefinitionClass):
+ """Meta-class used for defining the Enum base class.
+
+ Meta-class enables very specific behavior for any defined Enum
+ class. All attributes defined on an Enum sub-class must be integers.
+ Each attribute defined on an Enum sub-class is translated
+ into an instance of that sub-class, with the name of the attribute
+ as its name, and the number provided as its value. It also ensures
+ that only one level of Enum class hierarchy is possible. In other
+ words it is not possible to delcare sub-classes of sub-classes of
+ Enum.
+
+ This class also defines some functions in order to restrict the
+ behavior of the Enum class and its sub-classes. It is not possible
+ to change the behavior of the Enum class in later classes since
+ any new classes may be defined with only integer values, and no methods.
+ """
+
+ def __init__(cls, name, bases, dct):
+ # Can only define one level of sub-classes below Enum.
+ if not (bases == (object,) or bases == (Enum,)):
+ raise EnumDefinitionError('Enum type %s may only inherit from Enum' %
+ (name,))
+
+ cls.__by_number = {}
+ cls.__by_name = {}
+
+ # Enum base class does not need to be initialized or locked.
+ if bases != (object,):
+ # Replace integer with number.
+ for attribute, value in dct.iteritems():
+
+ # Module will be in every enum class.
+ if attribute in _RESERVED_ATTRIBUTE_NAMES:
+ continue
+
+ # Reject anything that is not an int.
+ if not isinstance(value, (int, long)):
+ raise EnumDefinitionError(
+ 'May only use integers in Enum definitions. Found: %s
= %s' %
+ (attribute, value))
+
+ # Protocol buffer standard recommends non-negative values.
+ # Reject negative values.
+ if value < 0:
+ raise EnumDefinitionError(
+ 'Must use non-negative enum values. Found: %s = %d' %
+ (attribute, value))
+
+ if value > MAX_ENUM_VALUE:
+ raise EnumDefinitionError(
+ 'Must use enum values less than or equal %d. Found: %s
= %d' %
+ (MAX_ENUM_VALUE, attribute, value))
+
+ if value in cls.__by_number:
+ raise EnumDefinitionError(
+ 'Value for %s = %d is already defined: %s' %
+ (attribute, value, cls.__by_number[value].name))
+
+ # Create enum instance and list in new Enum type.
+ instance = object.__new__(cls)
+ cls.__init__(instance, attribute, value)
+ cls.__by_name[instance.name] = instance
+ cls.__by_number[instance.number] = instance
+ setattr(cls, attribute, instance)
+
+ _DefinitionClass.__init__(cls, name, bases, dct)
+
+ def __iter__(cls):
+ """Iterate over all values of enum.
+
+ Yields:
+ Enumeration instances of the Enum class in arbitrary order.
+ """
+ return cls.__by_number.itervalues()
+
+ def names(cls):
+ """Get all names for Enum.
+
+ Returns:
+ An iterator for names of the enumeration in arbitrary order.
+ """
+ return cls.__by_name.iterkeys()
+
+ def numbers(cls):
+ """Get all numbers for Enum.
+
+ Returns:
+ An iterator for all numbers of the enumeration in arbitrary order.
+ """
+ return cls.__by_number.iterkeys()
+
+ def lookup_by_name(cls, name):
+ """Look up Enum by name.
+
+ Args:
+ name: Name of enum to find.
+
+ Returns:
+ Enum sub-class instance of that value.
+ """
+ return cls.__by_name[name]
+
+ def lookup_by_number(cls, number):
+ """Look up Enum by number.
+
+ Args:
+ number: Number of enum to find.
+
+ Returns:
+ Enum sub-class instance of that value.
+ """
+ return cls.__by_number[number]
+
+ def __len__(cls):
+ return len(cls.__by_name)
+
+
+class Enum(object):
+ """Base class for all enumerated types."""
+
+ __metaclass__ = _EnumClass
+
+ __slots__ = set(('name', 'number'))
+
+ def __new__(cls, index):
+ """Acts as look-up routine after class is initialized.
+
+ The purpose of overriding __new__ is to provide a way to treat
+ Enum subclasses as casting types, similar to how the int type
+ functions. A program can pass a string or an integer and this
+ method with "convert" that value in to an appropriate Enum instance.
+
+ Args:
+ index: Name or number to look up. During initialization
+ this is always the name of the new enum value.
+
+ Raises:
+ TypeError: When an inappropriate index value is passed provided.
+ """
+ # If is enum type of this class, return it.
+ if isinstance(index, cls):
+ return index
+
+ # If number, look up by number.
+ if isinstance(index, (int, long)):
+ try:
+ return cls.lookup_by_number(index)
+ except KeyError:
+ pass
+
+ # If name, look up by name.
+ if isinstance(index, basestring):
+ try:
+ return cls.lookup_by_name(index)
+ except KeyError:
+ pass
+
+ raise TypeError('No such value for %s in Enum %s' %
+ (index, cls.__name__))
+
+ def __init__(self, name, number=None):
+ """Initialize new Enum instance.
+
+ Since this should only be called during class initialization any
+ calls that happen after the class is frozen raises an exception.
+ """
+ # Immediately return if __init__ was called after _Enum.__init__().
+ # It means that casting operator version of the class constructor
+ # is being used.
+ if getattr(type(self), '_DefinitionClass__initialized'):
+ return
+ object.__setattr__(self, 'name', name)
+ object.__setattr__(self, 'number', number)
+
+ def __setattr__(self, name, value):
+ raise TypeError('May not change enum values')
+
+ def __str__(self):
+ return self.name
+
+ def __int__(self):
+ return self.number
+
+ def __repr__(self):
+ return '%s(%s, %d)' % (type(self).__name__, self.name, self.number)
+
+ def __cmp__(self, other):
+ """Order is by number."""
+ if isinstance(other, type(self)):
+ return cmp(self.number, other.number)
+ return NotImplemented
+
+ @classmethod
+ def to_dict(cls):
+ """Make dictionary version of enumerated class.
+
+ Dictionary created this way can be used with def_num.
+
+ Returns:
+ A dict (name) -> number
+ """
+ return dict((item.name, item.number) for item in iter(cls))
+
+ @staticmethod
+ def def_enum(dct, name):
+ """Define enum class from dictionary.
+
+ Args:
+ dct: Dictionary of enumerated values for type.
+ name: Name of enum.
+ """
+ return type(name, (Enum,), dct)
+
+
+# TODO(rafek): Determine to what degree this enumeration should be
compatible
+# with FieldDescriptor.Type in:
+#
+#
http://code.google.com/p/protobuf/source/browse/trunk/src/google/protobuf/descriptor.proto
+class Variant(Enum):
+ """Wire format variant.
+
+ Used by the 'protobuf' wire format to determine how to transmit
+ a single piece of data. May be used by other formats.
+
+ See: http://code.google.com/apis/protocolbuffers/docs/encoding.html
+
+ Values:
+ DOUBLE: 64-bit floating point number.
+ FLOAT: 32-bit floating point number.
+ INT64: 64-bit signed integer.
+ UINT64: 64-bit unsigned integer.
+ INT32: 32-bit signed integer.
+ BOOL: Boolean value (True or False).
+ STRING: String of UTF-8 encoded text.
+ MESSAGE: Embedded message as byte string.
+ BYTES: String of 8-bit bytes.
+ UINT32: 32-bit unsigned integer.
+ ENUM: Enum value as integer.
+ SINT32: 32-bit signed integer. Uses "zig-zag" encoding.
+ SINT64: 64-bit signed integer. Uses "zig-zag" encoding.
+ """
+ DOUBLE = 1
+ FLOAT = 2
+ INT64 = 3
+ UINT64 = 4
+ INT32 = 5
+ BOOL = 8
+ STRING = 9
+ MESSAGE = 11
+ BYTES = 12
+ UINT32 = 13
+ ENUM = 14
+ SINT32 = 17
+ SINT64 = 18
+
+
+class _MessageClass(_DefinitionClass):
+ """Meta-class used for defining the Message base class.
+
+ For more details about Message classes, see the Message class docstring.
+ Information contained there may help understanding this class.
+
+ Meta-class enables very specific behavior for any defined Message
+ class. All attributes defined on an Message sub-class must be field
+ instances, Enum class definitions or other Message class definitions.
Each
+ field attribute defined on an Message sub-class is added to the set of
+ field definitions and the attribute is translated in to a slot. It also
+ ensures that only one level of Message class hierarchy is possible. In
other
+ words it is not possible to declare sub-classes of sub-classes of
+ Message.
+
+ This class also defines some functions in order to restrict the
+ behavior of the Message class and its sub-classes. It is not possible
+ to change the behavior of the Message class in later classes since
+ any new classes may be defined with only field, Enums and Messages, and
+ no methods.
+ """
+
+ def __new__(cls, name, bases, dct):
+ """Create new Message class instance.
+
+ The __new__ method of the _MessageClass type is overridden so as to
+ allow the translation of Field instances to slots.
+ """
+ by_number = {}
+ by_name = {}
+
+ variant_map = {}
+
+ if bases != (object,):
+ # Can only define one level of sub-classes below Message.
+ if bases != (Message,):
+ raise MessageDefinitionError(
+ 'Message types may only inherit from Message')
+
+ enums = []
+ messages = []
+ # Must not use iteritems because this loop will change the state of
dct.
+ for key, field in dct.items():
+
+ if key in _RESERVED_ATTRIBUTE_NAMES:
+ continue
+
+ if isinstance(field, type) and issubclass(field, Enum):
+ enums.append(key)
+ continue
+
+ if (isinstance(field, type) and
+ issubclass(field, Message) and
+ field is not Message):
+ messages.append(key)
+ continue
+
+ # Reject anything that is not a field.
+ if type(field) is Field or not isinstance(field, Field):
+ raise MessageDefinitionError(
+ 'May only use fields in message definitions. Found: %s
= %s' %
+ (key, field))
+
+ if field.number in by_number:
+ raise DuplicateNumberError(
+ 'Field with number %d declared more than once in %s' %
+ (field.number, name))
+
+ field.name = key
+
+ # Place in name and number maps.
+ by_name[key] = field
+ by_number[field.number] = field
+
+ # Add enums if any exist.
+ if enums:
+ dct['__enums__'] = sorted(enums)
+
+ # Add messages if any exist.
+ if messages:
+ dct['__messages__'] = sorted(messages)
+
+ dct['_Message__by_number'] = by_number
+ dct['_Message__by_name'] = by_name
+
+ return _DefinitionClass.__new__(cls, name, bases, dct)
+
+ def __init__(cls, name, bases, dct):
+ """Initializer required to assign references to new class."""
+ if bases != (object,):
+ for value in dct.itervalues():
+ if isinstance(value, _DefinitionClass) and not value is Message:
+ value._message_definition = weakref.ref(cls)
+
+ for field in cls.all_fields():
+ field._message_definition = weakref.ref(cls)
+
+ _DefinitionClass.__init__(cls, name, bases, dct)
+
+
+class Message(object):
+ """Base class for user defined message objects.
+
+ Used to define messages for efficient transmission across network or
+ process space. Messages are defined using the field classes
(IntegerField,
+ FloatField, EnumField, etc.).
+
+ Messages are more restricted than normal classes in that they may only
+ contain field attributes and other Message and Enum definitions. These
+ restrictions are in place because the structure of the Message class is
+ intentended to itself be transmitted across network or process space and
+ used directly by clients or even other servers. As such methods and
+ non-field attributes could not be transmitted with the structural
information
+ causing discrepancies between different languages and implementations.
+
+ Initialization and validation:
+
+ A Message object is considered to be initialized if it has all required
+ fields and any nested messages are also initialized.
+
+ Calling 'check_initialized' will raise a ValidationException if it is
not
+ initialized; 'is_initialized' returns a boolean value indicating if it
is
+ valid.
+
+ Validation automatically occurs when Message objects are created
+ and populated. Validation that a given value will be compatible with
+ a field that it is assigned to can be done through the Field instances
+ validate() method. The validate method used on a message will check
that
+ all values of a message and its sub-messages are valid. Assingning an
+ invalid value to a field will raise a ValidationException.
+
+ Example:
+
+ # Trade type.
+ class TradeType(Enum):
+ BUY = 1
+ SELL = 2
+ SHORT = 3
+ CALL = 4
+
+ class Lot(Message):
+ price = IntegerField(1, required=True)
+ quantity = IntegerField(2, required=True)
+
+ class Order(Message):
+ symbol = StringField(1, required=True)
+ total_quantity = IntegerField(2, required=True)
+ trade_type = EnumField(TradeType, 3, required=True)
+ lots = MessageField(Lot, 4, repeated=True)
+ limit = IntegerField(5)
+
+ order = Order(symbol='GOOG',
+ total_quantity=10,
+ trade_type=TradeType.BUY)
+
+ lot1 = Lot(price=304,
+ quantity=7)
+
+ lot2 = Lot(price = 305,
+ quantity=3)
+
+ order.lots = [lot1, lot2]
+
+ # Now object is initialized!
+ order.check_initialized()
+ """
+
+ __metaclass__ = _MessageClass
+
+ def __init__(self, **kwargs):
+ """Initialize internal messages state.
+
+ Args:
+ A message can be initialized via the constructor by passing in
keyword
+ arguments corresponding to fields. For example:
+
+ class Date(Message):
+ day = IntegerField(1)
+ month = IntegerField(2)
+ year = IntegerField(3)
+
+ Invoking:
+
+ date = Date(day=6, month=6, year=1911)
+
+ is the same as doing:
+
+ date = Date()
+ date.day = 6
+ date.month = 6
+ date.year = 1911
+ """
+ # Tag being an essential implementation detail must be private.
+ self.__tags = {}
+ self.__unrecognized_fields = {}
+
+ assigned = set()
+ for name, value in kwargs.iteritems():
+ setattr(self, name, value)
+ assigned.add(name)
+
+ # initialize repeated fields.
+ for field in self.all_fields():
+ if field.repeated and field.name not in assigned:
+ setattr(self, field.name, [])
+
+
+ def check_initialized(self):
+ """Check class for initialization status.
+
+ Check that all required fields are initialized
+
+ Raises:
+ ValidationError: If message is not initialized.
+ """
+ for name, field in self.__by_name.iteritems():
+ value = getattr(self, name)
+ if value is None:
+ if field.required:
+ raise ValidationError("Message %s is missing required field %s" %
+ (type(self).__name__, name))
+ else:
+ try:
+ if (isinstance(field, MessageField) and
+ issubclass(field.message_type, Message)):
+ if field.repeated:
+ for item in value:
+ item_message_value = field.value_to_message(item)
+ item_message_value.check_initialized()
+ else:
+ message_value = field.value_to_message(value)
+ message_value.check_initialized()
+ except ValidationError, err:
+ if not hasattr(err, 'message_name'):
+ err.message_name = type(self).__name__
+ raise
+
+ def is_initialized(self):
+ """Get initialization status.
+
+ Returns:
+ True if message is valid, else False.
+ """
+ try:
+ self.check_initialized()
+ except ValidationError:
+ return False
+ else:
+ return True
+
+ @classmethod
+ def all_fields(cls):
+ """Get all field definition objects.
+
+ Ordering is arbitrary.
+
+ Returns:
+ Iterator over all values in arbitrary order.
+ """
+ return cls.__by_name.itervalues()
+
+ @classmethod
+ def field_by_name(cls, name):
+ """Get field by name.
+
+ Returns:
+ Field object associated with name.
+
+ Raises:
+ KeyError if no field found by that name.
+ """
+ return cls.__by_name[name]
+
+ @classmethod
+ def field_by_number(cls, number):
+ """Get field by number.
+
+ Returns:
+ Field object associated with number.
+
+ Raises:
+ KeyError if no field found by that number.
+ """
+ return cls.__by_number[number]
+
+ def get_assigned_value(self, name):
+ """Get the assigned value of an attribute.
+
+ Get the underlying value of an attribute. If value has not been set,
will
+ not return the default for the field.
+
+ Args:
+ name: Name of attribute to get.
+
+ Returns:
+ Value of attribute, None if it has not been set.
+ """
+ message_type = type(self)
+ try:
+ field = message_type.field_by_name(name)
+ except KeyError:
+ raise AttributeError('Message %s has no field %s' % (
+ message_type.__name__, name))
+ return self.__tags.get(field.number)
+
+ def reset(self, name):
+ """Reset assigned value for field.
+
+ Resetting a field will return it to its default value or None.
+
+ Args:
+ name: Name of field to reset.
+ """
+ message_type = type(self)
+ try:
+ field = message_type.field_by_name(name)
+ except KeyError:
+ if name not in message_type.__by_name:
+ raise AttributeError('Message %s has no field %s' % (
+ message_type.__name__, name))
+ self.__tags.pop(field.number, None)
+
+ def all_unrecognized_fields(self):
+ """Get the names of all unrecognized fields in this message."""
+ return self.__unrecognized_fields.keys()
+
+ def get_unrecognized_field_info(self, key, value_default=None,
+ variant_default=None):
+ """Get the value and variant of an unknown field in this message.
+
+ Args:
+ key: The name or number of the field to retrieve.
+ value_default: Value to be returned if the key isn't found.
+ variant_default: Value to be returned as variant if the key isn't
+ found.
+
+ Returns:
+ (value, variant), where value and variant are whatever was passed
+ to set_unrecognized_field.
+ """
+ value, variant = self.__unrecognized_fields.get(key, (value_default,
+ variant_default))
+ return value, variant
+
+ def set_unrecognized_field(self, key, value, variant):
+ """Set an unrecognized field, used when decoding a message.
+
+ Args:
+ key: The name or number used to refer to this unknown value.
+ value: The value of the field.
+ variant: Type information needed to interpret the value or re-encode
it.
+
+ Raises:
+ TypeError: If the variant is not an instance of messages.Variant.
+ """
+ if not isinstance(variant, Variant):
+ raise TypeError('Variant type %s is not valid.' % variant)
+ self.__unrecognized_fields[key] = value, variant
+
+ def __setattr__(self, name, value):
+ """Change set behavior for messages.
+
+ Messages may only be assigned values that are fields.
+
+ Does not try to validate field when set.
+
+ Args:
+ name: Name of field to assign to.
+ vlaue: Value to assign to field.
+
+ Raises:
+ AttributeError when trying to assign value that is not a field.
+ """
+ if name in self.__by_name or name.startswith('_Message__'):
+ object.__setattr__(self, name, value)
+ else:
+ raise AttributeError("May not assign arbitrary value %s "
+ "to message %s" % (name, type(self).__name__))
+
+ def __repr__(self):
+ """Make string representation of message.
+
+ Example:
+
+ class MyMessage(messages.Message):
+ integer_value = messages.IntegerField(1)
+ string_value = messages.StringField(2)
+
+ my_message = MyMessage()
+ my_message.integer_value = 42
+ my_message.string_value = u'A string'
+
+ print my_message
+ >>> <MyMessage
+ ... integer_value: 42
+ ... string_value: u'A string'>
+
+ Returns:
+ String representation of message, including the values
+ of all fields and repr of all sub-messages.
+ """
+ body = ['<', type(self).__name__]
+ for field in sorted(self.all_fields(),
+ key=lambda f: f.number):
+ attribute = field.name
+ value = self.get_assigned_value(field.name)
+ if value is not None:
+ body.append('\n %s: %s' % (attribute, repr(value)))
+ body.append('>')
+ return ''.join(body)
+
+ def __eq__(self, other):
+ """Equality operator.
+
+ Does field by field comparison with other message. For
+ equality, must be same type and values of all fields must be
+ equal.
+
+ Messages not required to be initialized for comparison.
+
+ Does not attempt to determine equality for values that have
+ default values that are not set. In other words:
+
+ class HasDefault(Message):
+
+ attr1 = StringField(1, default='default value')
+
+ message1 = HasDefault()
+ message2 = HasDefault()
+ message2.attr1 = 'default value'
+
+ message1 != message2
+
+ Does not compare unknown values.
+
+ Args:
+ other: Other message to compare with.
+ """
+ # TODO(rafek): Implement "equivalent" which does comparisons
+ # taking default values in to consideration.
+ if self is other:
+ return True
+
+ if type(self) is not type(other):
+ return False
+
+ return self.__tags == other.__tags
+
+ def __ne__(self, other):
+ """Not equals operator.
+
+ Does field by field comparison with other message. For
+ non-equality, must be different type or any value of a field must be
+ non-equal to the same field in the other instance.
+
+ Messages not required to be initialized for comparison.
+
+ Args:
+ other: Other message to compare with.
***The diff for this file has been truncated for email.***
=======================================
--- /dev/null
+++ /third_party/gsutil/gslib/third_party/protorpc/protojson.py Mon Nov 3
12:38:28 2014 UTC
@@ -0,0 +1,362 @@
+#!/usr/bin/env python
+#
+# Copyright 2010 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""JSON support for message types.
+
+Public classes:
+ MessageJSONEncoder: JSON encoder for message objects.
+
+Public functions:
+ encode_message: Encodes a message in to a JSON string.
+ decode_message: Merge from a JSON string in to a message.
+"""
+
+__author__ = 'ra...@google.com (Rafe Kaplan)'
+
+import cStringIO
+import base64
+import logging
+
+from gslib.third_party.protorpc import message_types
+from gslib.third_party.protorpc import messages
+from gslib.third_party.protorpc import util
+
+__all__ = [
+ 'ALTERNATIVE_CONTENT_TYPES',
+ 'CONTENT_TYPE',
+ 'MessageJSONEncoder',
+ 'encode_message',
+ 'decode_message',
+ 'ProtoJson',
+]
+
+
+def _load_json_module():
+ """Try to load a valid json module.
+
+ There are more than one json modules that might be installed. They are
+ mostly compatible with one another but some versions may be different.
+ This function attempts to load various json modules in a preferred order.
+ It does a basic check to guess if a loaded version of json is compatible.
+
+ Returns:
+ Compatible json module.
+
+ Raises:
+ ImportError if there are no json modules or the loaded json module is
+ not compatible with ProtoRPC.
+ """
+ first_import_error = None
+ for module_name in ['json',
+ 'simplejson']:
+ try:
+ module = __import__(module_name, {}, {}, 'json')
+ if not hasattr(module, 'JSONEncoder'):
+ message = ('json library "%s" is not compatible with ProtoRPC' %
+ module_name)
+ logging.warning(message)
+ raise ImportError(message)
+ else:
+ return module
+ except ImportError, err:
+ if not first_import_error:
+ first_import_error = err
+
+ logging.error('Must use valid json library (Python 2.6 json or
simplejson)')
+ raise first_import_error
+json = _load_json_module()
+
+
+# TODO: Rename this to MessageJsonEncoder.
+class MessageJSONEncoder(json.JSONEncoder):
+ """Message JSON encoder class.
+
+ Extension of JSONEncoder that can build JSON from a message object.
+ """
+
+ def __init__(self, protojson_protocol=None, **kwargs):
+ """Constructor.
+
+ Args:
+ protojson_protocol: ProtoJson instance.
+ """
+ super(MessageJSONEncoder, self).__init__(**kwargs)
+ self.__protojson_protocol = protojson_protocol or
ProtoJson.get_default()
+
+ def default(self, value):
+ """Return dictionary instance from a message object.
+
+ Args:
+ value: Value to get dictionary for. If not encodable, will
+ call superclasses default method.
+ """
+ if isinstance(value, messages.Enum):
+ return str(value)
+
+ if isinstance(value, messages.Message):
+ result = {}
+ for field in value.all_fields():
+ item = value.get_assigned_value(field.name)
+ if item not in (None, [], ()):
+ result[field.name] = self.__protojson_protocol.encode_field(
+ field, item)
+ # Handle unrecognized fields, so they're included when a message is
+ # decoded then encoded.
+ for unknown_key in value.all_unrecognized_fields():
+ unrecognized_field, _ =
value.get_unrecognized_field_info(unknown_key)
+ result[unknown_key] = unrecognized_field
+ return result
+ else:
+ return super(MessageJSONEncoder, self).default(value)
+
+
+class ProtoJson(object):
+ """ProtoRPC JSON implementation class.
+
+ Implementation of JSON based protocol used for serializing and
deserializing
+ message objects. Instances of remote.ProtocolConfig constructor or used
with
+ remote.Protocols.add_protocol. See the remote.py module for more
details.
+ """
+
+ CONTENT_TYPE = 'application/json'
+ ALTERNATIVE_CONTENT_TYPES = [
+ 'application/x-javascript',
+ 'text/javascript',
+ 'text/x-javascript',
+ 'text/x-json',
+ 'text/json',
+ ]
+
+ def encode_field(self, field, value):
+ """Encode a python field value to a JSON value.
+
+ Args:
+ field: A ProtoRPC field instance.
+ value: A python value supported by field.
+
+ Returns:
+ A JSON serializable value appropriate for field.
+ """
+ if isinstance(field, messages.BytesField):
+ if field.repeated:
+ value = [base64.b64encode(byte) for byte in value]
+ else:
+ value = base64.b64encode(value)
+ elif isinstance(field, message_types.DateTimeField):
+ # DateTimeField stores its data as a RFC 3339 compliant string.
+ if field.repeated:
+ value = [i.isoformat() for i in value]
+ else:
+ value = value.isoformat()
+ return value
+
+ def encode_message(self, message):
+ """Encode Message instance to JSON string.
+
+ Args:
+ Message instance to encode in to JSON string.
+
+ Returns:
+ String encoding of Message instance in protocol JSON format.
+
+ Raises:
+ messages.ValidationError if message is not initialized.
+ """
+ message.check_initialized()
+
+ return json.dumps(message, cls=MessageJSONEncoder,
protojson_protocol=self)
+
+ def decode_message(self, message_type, encoded_message):
+ """Merge JSON structure to Message instance.
+
+ Args:
+ message_type: Message to decode data to.
+ encoded_message: JSON encoded version of message.
+
+ Returns:
+ Decoded instance of message_type.
+
+ Raises:
+ ValueError: If encoded_message is not valid JSON.
+ messages.ValidationError if merged message is not initialized.
+ """
+ if not encoded_message.strip():
+ return message_type()
+
+ dictionary = json.loads(encoded_message)
+ message = self.__decode_dictionary(message_type, dictionary)
+ message.check_initialized()
+ return message
+
+ def __find_variant(self, value):
+ """Find the messages.Variant type that describes this value.
+
+ Args:
+ value: The value whose variant type is being determined.
+
+ Returns:
+ The messages.Variant value that best describes value's type, or None
if
+ it's a type we don't know how to handle.
+ """
+ if isinstance(value, bool):
+ return messages.Variant.BOOL
+ elif isinstance(value, (int, long)):
+ return messages.Variant.INT64
+ elif isinstance(value, float):
+ return messages.Variant.DOUBLE
+ elif isinstance(value, basestring):
+ return messages.Variant.STRING
+ elif isinstance(value, (list, tuple)):
+ # Find the most specific variant that covers all elements.
+ variant_priority = [None, messages.Variant.INT64,
messages.Variant.DOUBLE,
+ messages.Variant.STRING]
+ chosen_priority = 0
+ for v in value:
+ variant = self.__find_variant(v)
+ try:
+ priority = variant_priority.index(variant)
+ except IndexError:
+ priority = -1
+ if priority > chosen_priority:
+ chosen_priority = priority
+ return variant_priority[chosen_priority]
+ # Unrecognized type.
+ return None
+
+ def __decode_dictionary(self, message_type, dictionary):
+ """Merge dictionary in to message.
+
+ Args:
+ message: Message to merge dictionary in to.
+ dictionary: Dictionary to extract information from. Dictionary
+ is as parsed from JSON. Nested objects will also be dictionaries.
+ """
+ message = message_type()
+ for key, value in dictionary.iteritems():
+ if value is None:
+ try:
+ message.reset(key)
+ except AttributeError:
+ pass # This is an unrecognized field, skip it.
+ continue
+
+ try:
+ field = message.field_by_name(key)
+ except KeyError:
+ # Save unknown values.
+ variant = self.__find_variant(value)
+ if variant:
+ if key.isdigit():
+ key = int(key)
+ message.set_unrecognized_field(key, value, variant)
+ else:
+ logging.warning('No variant found for unrecognized field: %s',
key)
+ continue
+
+ # Normalize values in to a list.
+ if isinstance(value, list):
+ if not value:
+ continue
+ else:
+ value = [value]
+
+ valid_value = []
+ for item in value:
+ valid_value.append(self.decode_field(field, item))
+
+ if field.repeated:
+ existing_value = getattr(message, field.name)
+ setattr(message, field.name, valid_value)
+ else:
+ setattr(message, field.name, valid_value[-1])
+ return message
+
+ def decode_field(self, field, value):
+ """Decode a JSON value to a python value.
+
+ Args:
+ field: A ProtoRPC field instance.
+ value: A serialized JSON value.
+
+ Return:
+ A Python value compatible with field.
+ """
+ if isinstance(field, messages.EnumField):
+ try:
+ return field.type(value)
+ except TypeError:
+ raise messages.DecodeError('Invalid enum value "%s"' % value[0])
+
+ elif isinstance(field, messages.BytesField):
+ try:
+ return base64.b64decode(value)
+ except TypeError, err:
+ raise messages.DecodeError('Base64 decoding error: %s' % err)
+
+ elif isinstance(field, message_types.DateTimeField):
+ try:
+ return util.decode_datetime(value)
+ except ValueError, err:
+ raise messages.DecodeError(err)
+
+ elif (isinstance(field, messages.MessageField) and
+ issubclass(field.type, messages.Message)):
+ return self.__decode_dictionary(field.type, value)
+
+ elif (isinstance(field, messages.FloatField) and
+ isinstance(value, (int, long, basestring))):
+ try:
+ return float(value)
+ except:
+ pass
+
+ elif (isinstance(field, messages.IntegerField) and
+ isinstance(value, basestring)):
+ try:
+ return int(value)
+ except:
+ pass
+
+ return value
+
+ @staticmethod
+ def get_default():
+ """Get default instanceof ProtoJson."""
+ try:
+ return ProtoJson.__default
+ except AttributeError:
+ ProtoJson.__default = ProtoJson()
+ return ProtoJson.__default
+
+ @staticmethod
+ def set_default(protocol):
+ """Set the default instance of ProtoJson.
+
+ Args:
+ protocol: A ProtoJson instance.
+ """
+ if not isinstance(protocol, ProtoJson):
+ raise TypeError('Expected protocol of type ProtoJson')
+ ProtoJson.__default = protocol
+
+CONTENT_TYPE = ProtoJson.CONTENT_TYPE
+
+ALTERNATIVE_CONTENT_TYPES = ProtoJson.ALTERNATIVE_CONTENT_TYPES
+
+encode_message = ProtoJson.get_default().encode_message
+
+decode_message = ProtoJson.get_default().decode_message
=======================================
--- /dev/null
+++ /third_party/gsutil/gslib/third_party/protorpc/util.py Mon Nov 3
12:38:28 2014 UTC
@@ -0,0 +1,494 @@
+#!/usr/bin/env python
+#
+# Copyright 2010 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""Common utility library."""
+
+from __future__ import with_statement
+
+__author__ = ['ra...@google.com (Rafe Kaplan)',
+ 'gu...@google.com (Guido van Rossum)',
+]
+
+import cgi
+import datetime
+import inspect
+import os
+import re
+import sys
+
+__all__ = ['AcceptItem',
+ 'AcceptError',
+ 'Error',
+ 'choose_content_type',
+ 'decode_datetime',
+ 'get_package_for_module',
+ 'pad_string',
+ 'parse_accept_header',
+ 'positional',
+ 'PROTORPC_PROJECT_URL',
+ 'TimeZoneOffset',
+]
+
+
+class Error(Exception):
+ """Base class for protorpc exceptions."""
+
+
+class AcceptError(Error):
+ """Raised when there is an error parsing the accept header."""
+
+
+PROTORPC_PROJECT_URL = 'http://code.google.com/p/google-protorpc'
+
+_TIME_ZONE_RE_STRING = r"""
+ # Examples:
+ # +01:00
+ # -05:30
+ # Z12:00
+ ((?P<z>Z) | (?P<sign>[-+])
+ (?P<hours>\d\d) :
+ (?P<minutes>\d\d))$
+"""
+_TIME_ZONE_RE = re.compile(_TIME_ZONE_RE_STRING, re.IGNORECASE |
re.VERBOSE)
+
+
+def pad_string(string):
+ """Pad a string for safe HTTP error responses.
+
+ Prevents Internet Explorer from displaying their own error messages
+ when sent as the content of error responses.
+
+ Args:
+ string: A string.
+
+ Returns:
+ Formatted string left justified within a 512 byte field.
+ """
+ return string.ljust(512)
+
+
+def positional(max_positional_args):
+ """A decorator to declare that only the first N arguments may be
positional.
+
+ This decorator makes it easy to support Python 3 style keyword-only
+ parameters. For example, in Python 3 it is possible to write:
+
+ def fn(pos1, *, kwonly1=None, kwonly1=None):
+ ...
+
+ All named parameters after * must be a keyword:
+
+ fn(10, 'kw1', 'kw2') # Raises exception.
+ fn(10, kwonly1='kw1') # Ok.
+
+ Example:
+ To define a function like above, do:
+
+ @positional(1)
+ def fn(pos1, kwonly1=None, kwonly2=None):
+ ...
+
+ If no default value is provided to a keyword argument, it becomes a
required
+ keyword argument:
+
+ @positional(0)
+ def fn(required_kw):
+ ...
+
+ This must be called with the keyword parameter:
+
+ fn() # Raises exception.
+ fn(10) # Raises exception.
+ fn(required_kw=10) # Ok.
+
+ When defining instance or class methods always remember to account for
+ 'self' and 'cls':
+
+ class MyClass(object):
+
+ @positional(2)
+ def my_method(self, pos1, kwonly1=None):
+ ...
+
+ @classmethod
+ @positional(2)
+ def my_method(cls, pos1, kwonly1=None):
+ ...
+
+ One can omit the argument to 'positional' altogether, and then no
+ arguments with default values may be passed positionally. This
+ would be equivalent to placing a '*' before the first argument
+ with a default value in Python 3. If there are no arguments with
+ default values, and no argument is given to 'positional', an error
+ is raised.
+
+ @positional
+ def fn(arg1, arg2, required_kw1=None, required_kw2=0):
+ ...
+
+ fn(1, 3, 5) # Raises exception.
+ fn(1, 3) # Ok.
+ fn(1, 3, required_kw1=5) # Ok.
+
+ Args:
+ max_positional_arguments: Maximum number of positional arguments. All
+ parameters after the this index must be keyword only.
+
+ Returns:
+ A decorator that prevents using arguments after max_positional_args
from
+ being used as positional parameters.
+
+ Raises:
+ TypeError if a keyword-only argument is provided as a positional
parameter.
+ ValueError if no maximum number of arguments is provided and the
function
+ has no arguments with default values.
+ """
+ def positional_decorator(wrapped):
+ def positional_wrapper(*args, **kwargs):
+ if len(args) > max_positional_args:
+ plural_s = ''
+ if max_positional_args != 1:
+ plural_s = 's'
+ raise TypeError('%s() takes at most %d positional argument%s '
+ '(%d given)' % (wrapped.__name__,
+ max_positional_args,
+ plural_s, len(args)))
+ return wrapped(*args, **kwargs)
+ return positional_wrapper
+
+ if isinstance(max_positional_args, (int, long)):
+ return positional_decorator
+ else:
+ args, _, _, defaults = inspect.getargspec(max_positional_args)
+ if defaults is None:
+ raise ValueError(
+ 'Functions with no keyword arguments must specify '
+ 'max_positional_args')
+ return positional(len(args) - len(defaults))(max_positional_args)
+
+
+# TODO(rafek): Support 'level' from the Accept header standard.
+class AcceptItem(object):
+ """Encapsulate a single entry of an Accept header.
+
+ Parses and extracts relevent values from an Accept header and implements
+ a sort order based on the priority of each requested type as defined
+ here:
+
+ http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html
+
+ Accept headers are normally a list of comma separated items. Each item
+ has the format of a normal HTTP header. For example:
+
+ Accept: text/plain, text/html, text/*, */*
+
+ This header means to prefer plain text over HTML, HTML over any other
+ kind of text and text over any other kind of supported format.
+
+ This class does not attempt to parse the list of items from the Accept
header.
+ The constructor expects the unparsed sub header and the index within the
+ Accept header that the fragment was found.
+
+ Properties:
+ index: The index that this accept item was found in the Accept header.
+ main_type: The main type of the content type.
+ sub_type: The sub type of the content type.
+ q: The q value extracted from the header as a float. If there is no q
+ value, defaults to 1.0.
+ values: All header attributes parsed form the sub-header.
+ sort_key: A tuple (no_main_type, no_sub_type, q, no_values, index):
+ no_main_type: */* has the least priority.
+ no_sub_type: Items with no sub-type have less priority.
+ q: Items with lower q value have less priority.
+ no_values: Items with no values have less priority.
+ index: Index of item in accept header is the last priority.
+ """
+
+ __CONTENT_TYPE_REGEX = re.compile(r'^([^/]+)/([^/]+)$')
+
+ def __init__(self, accept_header, index):
+ """Parse component of an Accept header.
+
+ Args:
+ accept_header: Unparsed sub-expression of accept header.
+ index: The index that this accept item was found in the Accept
header.
+ """
+ accept_header = accept_header.lower()
+ content_type, values = cgi.parse_header(accept_header)
+ match = self.__CONTENT_TYPE_REGEX.match(content_type)
+ if not match:
+ raise AcceptError('Not valid Accept header: %s' % accept_header)
+ self.__index = index
+ self.__main_type = match.group(1)
+ self.__sub_type = match.group(2)
+ self.__q = float(values.get('q', 1))
+ self.__values = values
+
+ if self.__main_type == '*':
+ self.__main_type = None
+
+ if self.__sub_type == '*':
+ self.__sub_type = None
+
+ self.__sort_key = (not self.__main_type,
+ not self.__sub_type,
+ -self.__q,
+ not self.__values,
+ self.__index)
+
+ @property
+ def index(self):
+ return self.__index
+
+ @property
+ def main_type(self):
+ return self.__main_type
+
+ @property
+ def sub_type(self):
+ return self.__sub_type
+
+ @property
+ def q(self):
+ return self.__q
+
+ @property
+ def values(self):
+ """Copy the dictionary of values parsed from the header fragment."""
+ return dict(self.__values)
+
+ @property
+ def sort_key(self):
+ return self.__sort_key
+
+ def match(self, content_type):
+ """Determine if the given accept header matches content type.
+
+ Args:
+ content_type: Unparsed content type string.
+
+ Returns:
+ True if accept header matches content type, else False.
+ """
+ content_type, _ = cgi.parse_header(content_type)
+ match = self.__CONTENT_TYPE_REGEX.match(content_type.lower())
+ if not match:
+ return False
+
+ main_type, sub_type = match.group(1), match.group(2)
+ if not(main_type and sub_type):
+ return False
+
+ return ((self.__main_type is None or self.__main_type == main_type) and
+ (self.__sub_type is None or self.__sub_type == sub_type))
+
+
+ def __cmp__(self, other):
+ """Comparison operator based on sort keys."""
+ if not isinstance(other, AcceptItem):
+ return NotImplemented
+ return cmp(self.sort_key, other.sort_key)
+
+ def __str__(self):
+ """Rebuilds Accept header."""
+ content_type = '%s/%s' % (self.__main_type or '*', self.__sub_type
or '*')
+ values = self.values
+
+ if values:
+ value_strings = ['%s=%s' % (i, v) for i, v in values.iteritems()]
+ return '%s; %s' % (content_type, '; '.join(value_strings))
+ else:
+ return content_type
+
+ def __repr__(self):
+ return 'AcceptItem(%r, %d)' % (str(self), self.__index)
+
+
+def parse_accept_header(accept_header):
+ """Parse accept header.
+
+ Args:
+ accept_header: Unparsed accept header. Does not include name of
header.
+
+ Returns:
+ List of AcceptItem instances sorted according to their priority.
+ """
+ accept_items = []
+ for index, header in enumerate(accept_header.split(',')):
+ accept_items.append(AcceptItem(header, index))
+ return sorted(accept_items)
+
+
+def choose_content_type(accept_header, supported_types):
+ """Choose most appropriate supported type based on what client accepts.
+
+ Args:
+ accept_header: Unparsed accept header. Does not include name of
header.
+ supported_types: List of content-types supported by the server. The
index
+ of the supported types determines which supported type is prefered by
+ the server should the accept header match more than one at the same
+ priority.
+
+ Returns:
+ The preferred supported type if the accept header matches any, else
None.
+ """
+ for accept_item in parse_accept_header(accept_header):
+ for supported_type in supported_types:
+ if accept_item.match(supported_type):
+ return supported_type
+ return None
+
+
+@positional(1)
+def get_package_for_module(module):
+ """Get package name for a module.
+
+ Helper calculates the package name of a module.
+
+ Args:
+ module: Module to get name for. If module is a string, try to find
+ module in sys.modules.
+
+ Returns:
+ If module contains 'package' attribute, uses that as package name.
+ Else, if module is not the '__main__' module, the module __name__.
+ Else, the base name of the module file name. Else None.
+ """
+ if isinstance(module, basestring):
+ try:
+ module = sys.modules[module]
+ except KeyError:
+ return None
+
+ try:
+ return unicode(module.package)
+ except AttributeError:
+ if module.__name__ == '__main__':
+ try:
+ file_name = module.__file__
+ except AttributeError:
+ pass
+ else:
+ base_name = os.path.basename(file_name)
+ split_name = os.path.splitext(base_name)
+ if len(split_name) == 1:
+ return unicode(base_name)
+ else:
+ return u'.'.join(split_name[:-1])
+
+ return unicode(module.__name__)
+
+
+class TimeZoneOffset(datetime.tzinfo):
+ """Time zone information as encoded/decoded for DateTimeFields."""
+
+ def __init__(self, offset):
+ """Initialize a time zone offset.
+
+ Args:
+ offset: Integer or timedelta time zone offset, in minutes from UTC.
This
+ can be negative.
+ """
+ super(TimeZoneOffset, self).__init__()
+ if isinstance(offset, datetime.timedelta):
+ offset = timedelta_totalseconds(offset)
+ self.__offset = offset
+
+ def utcoffset(self, dt):
+ """Get the a timedelta with the time zone's offset from UTC.
+
+ Returns:
+ The time zone offset from UTC, as a timedelta.
+ """
+ return datetime.timedelta(minutes=self.__offset)
+
+ def dst(self, dt):
+ """Get the daylight savings time offset.
+
+ The formats that ProtoRPC uses to encode/decode time zone information
don't
+ contain any information about daylight savings time. So this always
+ returns a timedelta of 0.
+
+ Returns:
+ A timedelta of 0.
+ """
+ return datetime.timedelta(0)
+
+
+def decode_datetime(encoded_datetime):
+ """Decode a DateTimeField parameter from a string to a python datetime.
+
+ Args:
+ encoded_datetime: A string in RFC 3339 format.
+
+ Returns:
+ A datetime object with the date and time specified in encoded_datetime.
+
+ Raises:
+ ValueError: If the string is not in a recognized format.
+ """
+ # Check if the string includes a time zone offset. Break out the
+ # part that doesn't include time zone info. Convert to uppercase
+ # because all our comparisons should be case-insensitive.
+ time_zone_match = _TIME_ZONE_RE.search(encoded_datetime)
+ if time_zone_match:
+ time_string = encoded_datetime[:time_zone_match.start(1)].upper()
+ else:
+ time_string = encoded_datetime.upper()
+
+ if '.' in time_string:
+ format_string = '%Y-%m-%dT%H:%M:%S.%f'
+ else:
+ format_string = '%Y-%m-%dT%H:%M:%S'
+
+ decoded_datetime = datetime.datetime.strptime(time_string, format_string)
+
+ if not time_zone_match:
+ return decoded_datetime
+
+ # Time zone info was included in the parameter. Add a tzinfo
+ # object to the datetime. Datetimes can't be changed after they're
+ # created, so we'll need to create a new one.
+ if time_zone_match.group('z'):
+ offset_minutes = 0
+ else:
+ sign = time_zone_match.group('sign')
+ hours, minutes = [int(value) for value in
+ time_zone_match.group('hours', 'minutes')]
+ offset_minutes = hours * 60 + minutes
+ if sign == '-':
+ offset_minutes *= -1
+
+ return datetime.datetime(decoded_datetime.year,
+ decoded_datetime.month,
+ decoded_datetime.day,
+ decoded_datetime.hour,
+ decoded_datetime.minute,
+ decoded_datetime.second,
+ decoded_datetime.microsecond,
+ TimeZoneOffset(offset_minutes))
+
+# TODO: This function was added to the existing library
+# (which is otherwise not python 2.6-compatible). If we move this to a
+# submodule we will need to fix it to include this change.
+def timedelta_totalseconds(delta):
+ # python2.6 does not have timedelta.total_seconds() so we have
+ # to calculate this ourselves. This is straight from the
+ # datetime docs.
+ return (
+ (delta.microseconds + (delta.seconds + delta.days * 24 * 3600)
+ * 10**6) / 10**6)
=======================================
--- /dev/null
+++ /third_party/gsutil/gslib/third_party/storage_apitools/__init__.py Mon
Nov 3 12:38:28 2014 UTC
@@ -0,0 +1,21 @@
+# Copyright 2014 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from gslib.third_party.storage_apitools.base_api import *
+from gslib.third_party.storage_apitools.credentials_lib import *
+from gslib.third_party.storage_apitools.encoding import *
+from gslib.third_party.storage_apitools.exceptions import *
+from gslib.third_party.storage_apitools.extra_types import *
+from gslib.third_party.storage_apitools.http_wrapper import *
+from gslib.third_party.storage_apitools.transfer import *
+from gslib.third_party.storage_apitools.util import *
=======================================
--- /dev/null
+++ /third_party/gsutil/gslib/third_party/storage_apitools/base_api.py Mon
Nov 3 12:38:28 2014 UTC
@@ -0,0 +1,596 @@
+# Copyright 2014 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Base class for api services."""
+
+import contextlib
+import httplib
+import logging
+import pprint
+import types
+import urllib
+import urlparse
+
+
+from gslib.third_party.protorpc import message_types
+from gslib.third_party.protorpc import messages
+
+from gslib.third_party.storage_apitools import credentials_lib
+from gslib.third_party.storage_apitools import encoding
+from gslib.third_party.storage_apitools import exceptions
+from gslib.third_party.storage_apitools import http_wrapper
+from gslib.third_party.storage_apitools import util
+
+__all__ = [
+ 'ApiMethodInfo',
+ 'ApiUploadInfo',
+ 'BaseApiClient',
+ 'BaseApiService',
+ 'NormalizeApiEndpoint',
+]
+
+# TODO: Remove this once we quiet the spurious logging in
+# oauth2client (or drop oauth2client).
+logging.getLogger('oauth2client.util').setLevel(logging.ERROR)
+
+_MAX_URL_LENGTH = 2048
+
+
+class ApiUploadInfo(messages.Message):
+ """Media upload information for a method.
+
+ Fields:
+ accept: (repeated) MIME Media Ranges for acceptable media uploads
+ to this method.
+ max_size: (integer) Maximum size of a media upload, such as 3MB
+ or 1TB (converted to an integer).
+ resumable_path: Path to use for resumable uploads.
+ resumable_multipart: (boolean) Whether or not the resumable endpoint
+ supports multipart uploads.
+ simple_path: Path to use for simple uploads.
+ simple_multipart: (boolean) Whether or not the simple endpoint
+ supports multipart uploads.
+ """
+ accept = messages.StringField(1, repeated=True)
+ max_size = messages.IntegerField(2)
+ resumable_path = messages.StringField(3)
+ resumable_multipart = messages.BooleanField(4)
+ simple_path = messages.StringField(5)
+ simple_multipart = messages.BooleanField(6)
+
+
+class ApiMethodInfo(messages.Message):
+ """Configuration info for an API method.
+
+ All fields are strings unless noted otherwise.
+
+ Fields:
+ relative_path: Relative path for this method.
+ method_id: ID for this method.
+ http_method: HTTP verb to use for this method.
+ path_params: (repeated) path parameters for this method.
+ query_params: (repeated) query parameters for this method.
+ ordered_params: (repeated) ordered list of parameters for
+ this method.
+ description: description of this method.
+ request_type_name: name of the request type.
+ response_type_name: name of the response type.
+ request_field: if not null, the field to pass as the body
+ of this POST request. may also be the REQUEST_IS_BODY
+ value below to indicate the whole message is the body.
+ upload_config: (ApiUploadInfo) Information about the upload
+ configuration supported by this method.
+ supports_download: (boolean) If True, this method supports
+ downloading the request via the `alt=media` query
+ parameter.
+ """
+
+ relative_path = messages.StringField(1)
+ method_id = messages.StringField(2)
+ http_method = messages.StringField(3)
+ path_params = messages.StringField(4, repeated=True)
+ query_params = messages.StringField(5, repeated=True)
+ ordered_params = messages.StringField(6, repeated=True)
+ description = messages.StringField(7)
+ request_type_name = messages.StringField(8)
+ response_type_name = messages.StringField(9)
+ request_field = messages.StringField(10, default='')
+ upload_config = messages.MessageField(ApiUploadInfo, 11)
+ supports_download = messages.BooleanField(12, default=False)
+REQUEST_IS_BODY = '<request>'
+
+
+def _LoadClass(name, messages_module):
+ if name.startswith('message_types.'):
+ _, _, classname = name.partition('.')
+ return getattr(message_types, classname)
+ elif '.' not in name:
+ return getattr(messages_module, name)
+ else:
+ raise exceptions.GeneratedClientError('Unknown class %s' % name)
+
+
+def _RequireClassAttrs(obj, attrs):
+ for attr in attrs:
+ attr_name = attr.upper()
+ if not hasattr(obj, '%s' % attr_name) or not getattr(obj, attr_name):
+ msg = 'No %s specified for object of class %s.' % (
+ attr_name, type(obj).__name__)
+ raise exceptions.GeneratedClientError(msg)
+
+
+def NormalizeApiEndpoint(api_endpoint):
+ if not api_endpoint.endswith('/'):
+ api_endpoint += '/'
+ return api_endpoint
+
+
+class _UrlBuilder(object):
+ """Convenient container for url data."""
+
+ def __init__(self, base_url, relative_path=None, query_params=None):
+ components = urlparse.urlsplit(urlparse.urljoin(
+ base_url, relative_path or ''))
+ if components.fragment:
+ raise exceptions.ConfigurationValueError(
+ 'Unexpected url fragment: %s' % components.fragment)
+ self.query_params = urlparse.parse_qs(components.query or '')
+ if query_params is not None:
+ self.query_params.update(query_params)
+ self.__scheme = components.scheme
+ self.__netloc = components.netloc
+ self.relative_path = components.path
+
+ @classmethod
+ def FromUrl(cls, url):
+ urlparts = urlparse.urlsplit(url)
+ query_params = urlparse.parse_qs(urlparts.query)
+ base_url = urlparse.urlunsplit((
+ urlparts.scheme, urlparts.netloc, '', None, None))
+ relative_path = urlparts.path
+ return cls(base_url, relative_path=relative_path,
query_params=query_params)
+
+ @property
+ def base_url(self):
+ return urlparse.urlunsplit((self.__scheme, self.__netloc, '', '', ''))
+
+ @base_url.setter
+ def base_url(self, value):
+ components = urlparse.urlsplit(value)
+ if components.path or components.query or components.fragment:
+ raise exceptions.ConfigurationValueError('Invalid base url: %s' %
value)
+ self.__scheme = components.scheme
+ self.__netloc = components.netloc
+
+ @property
+ def query(self):
+ # TODO: In the case that some of the query params are
+ # non-ASCII, we may silently fail to encode correctly. We should
+ # figure out who is responsible for owning the object -> str
+ # conversion.
+ return urllib.urlencode(self.query_params, doseq=True)
+
+ @property
+ def url(self):
+ if '{' in self.relative_path or '}' in self.relative_path:
+ raise exceptions.ConfigurationValueError(
+ 'Cannot create url with relative path %s' % self.relative_path)
+ return urlparse.urlunsplit((
+ self.__scheme, self.__netloc, self.relative_path, self.query, ''))
+
+
+class BaseApiClient(object):
+ """Base class for client libraries."""
+ MESSAGES_MODULE = None
+
+ _API_KEY = ''
+ _CLIENT_ID = ''
+ _CLIENT_SECRET = ''
+ _PACKAGE = ''
+ _SCOPES = []
+ _USER_AGENT = ''
+
+ def __init__(self, url, credentials=None, get_credentials=True,
http=None,
+ model=None, log_request=False, log_response=False,
num_retries=5,
+ credentials_args=None, default_global_params=None):
+ _RequireClassAttrs(self, (
+ '_package', '_scopes', '_client_id', '_client_secret',
+ 'messages_module'))
+ if default_global_params is not None:
+ util.Typecheck(default_global_params, self.params_type)
+ self.__default_global_params = default_global_params
+ self.log_request = log_request
+ self.log_response = log_response
+ self.__num_retries = 5
+ # We let the @property machinery below do our validation.
+ self.num_retries = num_retries
+ self._url = url
+ self._credentials = credentials
+ if get_credentials and not credentials:
+ credentials_args = credentials_args or {}
+ self._SetCredentials(**credentials_args)
+ self._http = http or http_wrapper.GetHttp()
+ # Note that "no credentials" is totally possible.
+ if self._credentials is not None:
+ self._http = self._credentials.authorize(self._http)
+ # TODO: Remove this field when we switch to proto2.
+ self.__include_fields = None
+
+ # TODO: Finish deprecating these fields.
+ _ = model
+
+ def _SetCredentials(self, **kwds):
+ """Fetch credentials, and set them for this client.
+
+ Note that we can't simply return credentials, since creating them
+ may involve side-effecting self.
+
+ Args:
+ **kwds: Additional keyword arguments are passed on to GetCredentials.
+
+ Returns:
+ None. Sets self._credentials.
+ """
+ args = {
+ 'api_key': self._API_KEY,
+ 'client': self,
+ 'client_id': self._CLIENT_ID,
+ 'client_secret': self._CLIENT_SECRET,
+ 'package_name': self._PACKAGE,
+ 'scopes': self._SCOPES,
+ 'user_agent': self._USER_AGENT,
+ }
+ args.update(kwds)
+ # TODO: It's a bit dangerous to pass this
+ # still-half-initialized self into this method, but we might need
+ # to set attributes on it associated with our credentials.
+ # Consider another way around this (maybe a callback?) and whether
+ # or not it's worth it.
+ self._credentials = credentials_lib.GetCredentials(**args)
+
+ @classmethod
+ def ClientInfo(cls):
+ return {
+ 'client_id': cls._CLIENT_ID,
+ 'client_secret': cls._CLIENT_SECRET,
+ 'scope': ' '.join(sorted(util.NormalizeScopes(cls._SCOPES))),
+ 'user_agent': cls._USER_AGENT,
+ }
+
+ @property
+ def base_model_class(self):
+ return None
+
+ @property
+ def http(self):
+ return self._http
+
+ @property
+ def url(self):
+ return self._url
+
+ @classmethod
+ def GetScopes(cls):
+ return cls._SCOPES
+
+ @property
+ def params_type(self):
+ return _LoadClass('StandardQueryParameters', self.MESSAGES_MODULE)
+
+ @property
+ def user_agent(self):
+ return self._USER_AGENT
+
+ @property
+ def _default_global_params(self):
+ if self.__default_global_params is None:
+ self.__default_global_params = self.params_type()
+ return self.__default_global_params
+
+ def AddGlobalParam(self, name, value):
+ params = self._default_global_params
+ setattr(params, name, value)
+
+ @property
+ def global_params(self):
+ return encoding.CopyProtoMessage(self._default_global_params)
+
+ @contextlib.contextmanager
+ def IncludeFields(self, include_fields):
+ self.__include_fields = include_fields
+ yield
+ self.__include_fields = None
+
+ @property
+ def num_retries(self):
+ return self.__num_retries
+
+ @num_retries.setter
+ def num_retries(self, value):
+ util.Typecheck(value, (int, long))
+ if value < 0:
+ raise exceptions.InvalidDataError(
+ 'Cannot have negative value for num_retries')
+ self.__num_retries = value
+
+ @contextlib.contextmanager
+ def WithRetries(self, num_retries):
+ old_num_retries = self.num_retries
+ self.num_retries = num_retries
+ yield
+ self.num_retries = old_num_retries
+
+ def ProcessRequest(self, method_config, request):
+ """Hook for pre-processing of requests."""
+ if self.log_request:
+ logging.info(
+ 'Calling method %s with %s: %s', method_config.method_id,
+ method_config.request_type_name, request)
+ return request
+
+ def ProcessHttpRequest(self, http_request):
+ """Hook for pre-processing of http requests."""
+ if self.log_request:
+ logging.info('Making http %s to %s',
+ http_request.http_method, http_request.url)
+ logging.info('Headers: %s', pprint.pformat(http_request.headers))
+ if http_request.body:
+ # TODO: Make this safe to print in the case of
+ # non-printable body characters.
+ logging.info('Body:\n%s',
+ http_request.loggable_body or http_request.body)
+ else:
+ logging.info('Body: (none)')
+ return http_request
+
+ def ProcessResponse(self, method_config, response):
+ if self.log_response:
+ logging.info('Response of type %s: %s',
+ method_config.response_type_name, response)
+ return response
+
+ # TODO: Decide where these two functions should live.
+ def SerializeMessage(self, message):
+ return encoding.MessageToJson(message,
include_fields=self.__include_fields)
+
+ def DeserializeMessage(self, response_type, data):
+ """Deserialize the given data as method_config.response_type."""
+ try:
+ message = encoding.JsonToMessage(response_type, data)
+ except (exceptions.InvalidDataFromServerError,
+ messages.ValidationError) as e:
+ raise exceptions.InvalidDataFromServerError(
+ 'Error decoding response "%s" as type %s: %s' % (
+ data, response_type.__name__, e))
+ return message
+
+ def FinalizeTransferUrl(self, url):
+ """Modify the url for a given transfer, based on auth and version."""
+ url_builder = _UrlBuilder.FromUrl(url)
+ if self.global_params.key:
+ url_builder.query_params['key'] = self.global_params.key
+ return url_builder.url
+
+
+class BaseApiService(object):
+ """Base class for generated API services."""
+
+ def __init__(self, client):
+ self.__client = client
+ self._method_configs = {}
+ self._upload_configs = {}
+
+ @property
+ def _client(self):
+ return self.__client
+
+ def GetMethodConfig(self, method):
+ return self._method_configs[method]
+
+ def GetUploadConfig(self, method):
+ return self._upload_configs.get(method)
+
+ def GetRequestType(self, method):
+ method_config = self.GetMethodConfig(method)
+ return getattr(self._client.MESSAGES_MODULE,
+ method_config.request_type_name)
+
+ def GetResponseType(self, method):
+ method_config = self.GetMethodConfig(method)
+ return getattr(self._client.MESSAGES_MODULE,
+ method_config.response_type_name)
+
+ def __CombineGlobalParams(self, global_params, default_params):
+ util.Typecheck(global_params, (types.NoneType,
self.__client.params_type))
+ result = self.__client.params_type()
+ global_params = global_params or self.__client.params_type()
+ for field in result.all_fields():
+ value = (global_params.get_assigned_value(field.name) or
+ default_params.get_assigned_value(field.name))
+ if value not in (None, [], ()):
+ setattr(result, field.name, value)
+ return result
+
+ def __ConstructQueryParams(self, query_params, request, global_params):
+ """Construct a dictionary of query parameters for this request."""
+ global_params = self.__CombineGlobalParams(
+ global_params, self.__client.global_params)
+ query_info = dict((field.name, getattr(global_params, field.name))
+ for field in self.__client.params_type.all_fields())
+ query_info.update(
+ (param, getattr(request, param, None)) for param in query_params)
+ query_info = dict((k, v) for k, v in query_info.iteritems()
+ if v is not None)
+ for k, v in query_info.iteritems():
+ if isinstance(v, unicode):
+ query_info[k] = v.encode('utf8')
+ elif isinstance(v, str):
+ query_info[k] = v.decode('utf8')
+ return query_info
+
+ def __ConstructRelativePath(self, method_config, request,
relative_path=None):
+ """Determine the relative path for request."""
+ path = relative_path or method_config.relative_path
+ path = path.replace('+', '')
+ for param in method_config.path_params:
+ param_template = '{%s}' % param
+ if param_template not in path:
+ raise exceptions.InvalidUserInputError(
+ 'Missing path parameter %s' % param)
+ try:
+ # TODO: Do we want to support some sophisticated
+ # mapping here?
+ value = getattr(request, param)
+ except AttributeError:
+ raise exceptions.InvalidUserInputError(
+ 'Request missing required parameter %s' % param)
+ if value is None:
+ raise exceptions.InvalidUserInputError(
+ 'Request missing required parameter %s' % param)
+ try:
+ if not isinstance(value, basestring):
+ value = str(value)
+ path = path.replace(param_template,
+ urllib.quote(value.encode('utf_8'), ''))
+ except TypeError as e:
+ raise exceptions.InvalidUserInputError(
+ 'Error setting required parameter %s to value %s: %s' % (
+ param, value, e))
+ return path
+
+ def __FinalizeRequest(self, http_request, url_builder):
+ """Make any final general adjustments to the request."""
+ if (http_request.http_method == 'GET' and
+ len(http_request.url) > _MAX_URL_LENGTH):
+ http_request.http_method = 'POST'
+ http_request.headers['x-http-method-override'] = 'GET'
+ http_request.headers['content-type']
= 'application/x-www-form-urlencoded'
+ http_request.body = url_builder.query
+ url_builder.query_params = {}
+ http_request.url = url_builder.url
+
+ def __ProcessHttpResponse(self, method_config, http_response):
+ """Process the given http response."""
+ if http_response.status_code not in (httplib.OK, httplib.NO_CONTENT):
+ raise exceptions.HttpError.FromResponse(http_response)
+ if http_response.status_code == httplib.NO_CONTENT:
+ # TODO: Find out why _replace doesn't seem to work here.
+ http_response = http_wrapper.Response(
+ info=http_response.info, content='{}',
+ request_url=http_response.request_url)
+ response_type = _LoadClass(
+ method_config.response_type_name, self.__client.MESSAGES_MODULE)
+ return self.__client.DeserializeMessage(
+ response_type, http_response.content)
+
+ def __SetBaseHeaders(self, http_request, client):
+ """Fill in the basic headers on http_request."""
+ # TODO: Make the default a little better here, and
+ # include the apitools version.
+ user_agent = client.user_agent or 'apitools-client/1.0'
+ http_request.headers['user-agent'] = user_agent
+ http_request.headers['accept'] = 'application/json'
+ http_request.headers['accept-encoding'] = 'gzip, deflate'
+
+ def __SetBody(self, http_request, method_config, request, upload):
+ """Fill in the body on http_request."""
+ if not method_config.request_field:
+ return
+
+ request_type = _LoadClass(
+ method_config.request_type_name, self.__client.MESSAGES_MODULE)
+ if method_config.request_field == REQUEST_IS_BODY:
+ body_value = request
+ body_type = request_type
+ else:
+ body_value = getattr(request, method_config.request_field)
+ body_field = request_type.field_by_name(method_config.request_field)
+ util.Typecheck(body_field, messages.MessageField)
+ body_type = body_field.type
+
+ if upload and not body_value:
+ # We're going to fill in the body later.
+ return
+ util.Typecheck(body_value, body_type)
+ http_request.headers['content-type'] = 'application/json'
+ http_request.body = self.__client.SerializeMessage(body_value)
+
+ def PrepareHttpRequest(self, method_config, request, global_params=None,
+ upload=None, upload_config=None, download=None):
+ """Prepares an HTTP request to be sent."""
+ request_type = _LoadClass(
+ method_config.request_type_name, self.__client.MESSAGES_MODULE)
+ util.Typecheck(request, request_type)
+ request = self.__client.ProcessRequest(method_config, request)
+
+ http_request =
http_wrapper.Request(http_method=method_config.http_method)
+ self.__SetBaseHeaders(http_request, self.__client)
+ self.__SetBody(http_request, method_config, request, upload)
+
+ url_builder = _UrlBuilder(
+ self.__client.url, relative_path=method_config.relative_path)
+ url_builder.query_params = self.__ConstructQueryParams(
+ method_config.query_params, request, global_params)
+
+ # It's important that upload and download go before we fill in the
+ # relative path, so that they can replace it.
+ if upload is not None:
+ upload.ConfigureRequest(upload_config, http_request, url_builder)
+ if download is not None:
+ download.ConfigureRequest(http_request, url_builder)
+
+ url_builder.relative_path = self.__ConstructRelativePath(
+ method_config, request, relative_path=url_builder.relative_path)
+ self.__FinalizeRequest(http_request, url_builder)
+
+ return self.__client.ProcessHttpRequest(http_request)
+
+ def _RunMethod(self, method_config, request, global_params=None,
+ upload=None, upload_config=None, download=None):
+ """Call this method with request."""
+ if upload is not None and download is not None:
+ # TODO: This just involves refactoring the logic
+ # below into callbacks that we can pass around; in particular,
+ # the order should be that the upload gets the initial request,
+ # and then passes its reply to a download if one exists, and
+ # then that goes to ProcessResponse and is returned.
+ raise exceptions.NotYetImplementedError(
+ 'Cannot yet use both upload and download at once')
+
+ http_request = self.PrepareHttpRequest(
+ method_config, request, global_params, upload, upload_config,
download)
+
+ # TODO: Make num_retries customizable on Transfer
+ # objects, and pass in self.__client.num_retries when initializing
+ # an upload or download.
+ if download is not None:
+ download.InitializeDownload(http_request, client=self._client)
+ return
+
+ http_response = None
+ if upload is not None:
+ http_response = upload.InitializeUpload(http_request,
client=self._client)
+ if http_response is None:
+ http = self.__client.http
+ if upload and upload.bytes_http:
+ http = upload.bytes_http
+ http_response = http_wrapper.MakeRequest(
+ http, http_request, retries=self.__client.num_retries)
+
+ return self.ProcessHttpResponse(method_config, http_response)
+
+ def ProcessHttpResponse(self, method_config, http_response):
+ """Convert an HTTP response to the expected message type."""
+ return self.__client.ProcessResponse(
+ method_config,
+ self.__ProcessHttpResponse(method_config, http_response))
=======================================
--- /dev/null
+++
/third_party/gsutil/gslib/third_party/storage_apitools/credentials_lib.py
Mon Nov 3 12:38:28 2014 UTC
@@ -0,0 +1,259 @@
+# Copyright 2014 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Common credentials classes and constructors."""
+
+import json
+import os
+import urllib2
+
+
+import httplib2
+import oauth2client.client
+import oauth2client.gce
+import oauth2client.multistore_file
+
+from gslib.third_party.storage_apitools import exceptions
+from gslib.third_party.storage_apitools import util
+
+__all__ = [
+ 'CredentialsFromFile',
+ 'GaeAssertionCredentials',
+ 'GceAssertionCredentials',
+ 'GetCredentials',
+ 'ServiceAccountCredentials',
+ 'ServiceAccountCredentialsFromFile',
+ ]
+
+
+# TODO: Expose the extra args here somewhere higher up,
+# possibly as flags in the generated CLI.
+def GetCredentials(package_name, scopes, client_id, client_secret,
user_agent,
+ credentials_filename=None,
+ service_account_name=None, service_account_keyfile=None,
+ api_key=None, client=None):
+ """Attempt to get credentials, using an oauth dance as the last
resort."""
+ scopes = util.NormalizeScopes(scopes)
+ # TODO: Error checking.
+ client_info = {
+ 'client_id': client_id,
+ 'client_secret': client_secret,
+ 'scope': ' '.join(sorted(util.NormalizeScopes(scopes))),
+ 'user_agent': user_agent or '%s-generated/0.1' % package_name,
+ }
+ if service_account_name is not None:
+ credentials = ServiceAccountCredentialsFromFile(
+ service_account_name, service_account_keyfile, scopes)
+ if credentials is not None:
+ return credentials
+ credentials = GaeAssertionCredentials.Get(scopes)
+ if credentials is not None:
+ return credentials
+ credentials = GceAssertionCredentials.Get(scopes)
+ if credentials is not None:
+ return credentials
+ credentials_filename = credentials_filename or os.path.expanduser(
+ '~/.apitools.token')
+ credentials = CredentialsFromFile(credentials_filename, client_info)
+ if credentials is not None:
+ return credentials
+ raise exceptions.CredentialsError('Could not create valid credentials')
+
+
+def ServiceAccountCredentialsFromFile(
+ service_account_name, private_key_filename, scopes):
+ with open(private_key_filename) as key_file:
+ return ServiceAccountCredentials(
+ service_account_name, key_file.read(), scopes)
+
+
+def ServiceAccountCredentials(service_account_name, private_key, scopes):
+ scopes = util.NormalizeScopes(scopes)
+ return oauth2client.client.SignedJwtAssertionCredentials(
+ service_account_name, private_key, scopes)
+
+
+# TODO: We override to add some utility code, and to
+# update the old refresh implementation. Either push this code into
+# oauth2client or drop oauth2client.
+class GceAssertionCredentials(oauth2client.gce.AppAssertionCredentials):
+ """Assertion credentials for GCE instances."""
+
+ def __init__(self, scopes=None, service_account_name='default', **kwds):
+ """Initializes the credentials instance.
+
+ Args:
+ scopes: The scopes to get. If None, whatever scopes that are
available
+ to the instance are used.
+ service_account_name: The service account to retrieve the scopes
from.
+ **kwds: Additional keyword args.
+ """
+ if not util.DetectGce():
+ raise exceptions.ResourceUnavailableError(
+ 'GCE credentials requested outside a GCE instance')
+ if not self.GetServiceAccount(service_account_name):
+ raise exceptions.ResourceUnavailableError(
+ 'GCE credentials requested but service account %s does not
exist.' %
+ service_account_name)
+ self.__service_account_name = service_account_name
+ if scopes:
+ scope_ls = util.NormalizeScopes(scopes)
+ instance_scopes = self.GetInstanceScopes()
+ if scope_ls > instance_scopes:
+ raise exceptions.CredentialsError(
+ 'Instance did not have access to scopes %s' % (
+ sorted(list(scope_ls - instance_scopes)),))
+ else:
+ scopes = self.GetInstanceScopes()
+ super(GceAssertionCredentials, self).__init__(scopes, **kwds)
+
+ @classmethod
+ def Get(cls, *args, **kwds):
+ try:
+ return cls(*args, **kwds)
+ except exceptions.Error:
+ return None
+
+ def GetServiceAccount(self, account):
+ account_uri = (
+ 'http://metadata.google.internal/computeMetadata/'
+ 'v1/instance/service-accounts')
+ additional_headers = {'X-Google-Metadata-Request': 'True'}
+ request = urllib2.Request(account_uri, headers=additional_headers)
+ try:
+ response = urllib2.urlopen(request)
+ except urllib2.URLError as e:
+ raise exceptions.CommunicationError(
+ 'Could not reach metadata service: %s' % e.reason)
+ response_lines = [line.rstrip('/\n\r') for line in
response.readlines()]
+ return account in response_lines
+
+ def GetInstanceScopes(self):
+ # Extra header requirement can be found here:
+ # https://developers.google.com/compute/docs/metadata
+ scopes_uri = (
+ 'http://metadata.google.internal/computeMetadata/v1/instance/'
+ 'service-accounts/%s/scopes') % self.__service_account_name
+ additional_headers = {'X-Google-Metadata-Request': 'True'}
+ request = urllib2.Request(scopes_uri, headers=additional_headers)
+ try:
+ response = urllib2.urlopen(request)
+ except urllib2.URLError as e:
+ raise exceptions.CommunicationError(
+ 'Could not reach metadata service: %s' % e.reason)
+ return util.NormalizeScopes(scope.strip() for scope in
response.readlines())
+
+ def _refresh(self, do_request): # pylint: disable=g-bad-name
+ """Refresh self.access_token.
+
+ Args:
+ do_request: A function matching httplib2.Http.request's signature.
+ """
+ token_uri = (
+ 'http://metadata.google.internal/computeMetadata/v1/instance/'
+ 'service-accounts/%s/token') % self.__service_account_name
+ extra_headers = {'X-Google-Metadata-Request': 'True'}
+ request = urllib2.Request(token_uri, headers=extra_headers)
+ try:
+ content = urllib2.urlopen(request).read()
+ except urllib2.URLError as e:
+ raise exceptions.CommunicationError(
+ 'Could not reach metadata service: %s' % e.reason)
+ try:
+ credential_info = json.loads(content)
+ except ValueError:
+ raise exceptions.CredentialsError(
+ 'Invalid credentials response: uri %s' % token_uri)
+
+ self.access_token = credential_info['access_token']
+
+
+# TODO: Currently, we can't even *load*
+# `oauth2client.appengine` without being on appengine, because of how
+# it handles imports. Fix that by splitting that module into
+# GAE-specific and GAE-independent bits, and guarding imports.
+class GaeAssertionCredentials(oauth2client.client.AssertionCredentials):
+ """Assertion credentials for Google App Engine apps."""
+
+ def __init__(self, scopes, **kwds):
+ if not util.DetectGae():
+ raise exceptions.ResourceUnavailableError(
+ 'GCE credentials requested outside a GCE instance')
+ self._scopes = list(util.NormalizeScopes(scopes))
+ super(GaeAssertionCredentials, self).__init__(None, **kwds)
+
+ @classmethod
+ def Get(cls, *args, **kwds):
+ try:
+ return cls(*args, **kwds)
+ except exceptions.Error:
+ return None
+
+ @classmethod
+ def from_json(cls, json_data): # pylint: disable=g-bad-name
+ data = json.loads(json_data)
+ return GaeAssertionCredentials(data['_scopes'])
+
+ def _refresh(self, _): # pylint: disable=g-bad-name
+ """Refresh self.access_token.
+
+ Args:
+ _: (ignored) A function matching httplib2.Http.request's signature.
+ """
+ # pylint: disable=g-import-not-at-top
+ from google.appengine.api import app_identity
+ try:
+ token, _ = app_identity.get_access_token(self._scopes)
+ except app_identity.Error as e:
+ raise exceptions.CredentialsError(str(e))
+ self.access_token = token
+
+
+# TODO: Switch this from taking a path to taking a stream.
+def CredentialsFromFile(path, client_info):
+ """Read credentials from a file."""
+ credential_store = oauth2client.multistore_file.get_credential_storage(
+ path,
+ client_info['client_id'],
+ client_info['user_agent'],
+ client_info['scope'])
+ credentials = credential_store.get()
+ if credentials is None or credentials.invalid:
+ print 'Generating new OAuth credentials ...'
+ while True:
+ # If authorization fails, we want to retry, rather than let this
+ # cascade up and get caught elsewhere. If users want out of the
+ # retry loop, they can ^C.
+ try:
+ flow = oauth2client.client.OAuth2WebServerFlow(**client_info)
+ flow.redirect_uri = oauth2client.client.OOB_CALLBACK_URN
+ authorize_url = flow.step1_get_authorize_url()
+ print 'Go to the following link in your browser:'
+ print
+ print ' ' + authorize_url
+ print
+ code = raw_input('Enter verification code: ').strip()
+ credential = flow.step2_exchange(code)
+ credential_store.put(credential)
+ credential.set_store(credential_store)
+ break
+ except (oauth2client.client.FlowExchangeError, SystemExit) as e:
+ # Here SystemExit is "no credential at all", and the
+ # FlowExchangeError is "invalid" -- usually because you reused
+ # a token.
+ print 'Invalid authorization: %s' % (e,)
+ except httplib2.HttpLib2Error as e:
+ print 'Communication error: %s' % (e,)
+ raise exceptions.CredentialsError(
+ 'Communication error creating credentials: %s' % e)
+ return credentials
=======================================
--- /dev/null
+++ /third_party/gsutil/gslib/third_party/storage_apitools/encoding.py Mon
Nov 3 12:38:28 2014 UTC
@@ -0,0 +1,315 @@
+# Copyright 2014 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Common code for converting proto to other formats, such as JSON."""
+
+import base64
+import collections
+import json
+
+
+from gslib.third_party.protorpc import messages
+from gslib.third_party.protorpc import protojson
+
+from gslib.third_party.storage_apitools import exceptions
+
+__all__ = [
+ 'CopyProtoMessage',
+ 'JsonToMessage',
+ 'MessageToJson',
+ 'DictToMessage',
+ 'MessageToDict',
+ 'PyValueToMessage',
+ 'MessageToPyValue',
+]
+
+
+_Codec = collections.namedtuple('_Codec', ['encoder', 'decoder'])
+CodecResult = collections.namedtuple('CodecResult', ['value', 'complete'])
+
+
+# TODO: Make these non-global.
+_UNRECOGNIZED_FIELD_MAPPINGS = {}
+_CUSTOM_MESSAGE_CODECS = {}
+_CUSTOM_FIELD_CODECS = {}
+_FIELD_TYPE_CODECS = {}
+
+
+def MapUnrecognizedFields(field_name):
+ """Register field_name as a container for unrecognized fields in
message."""
+ def Register(cls):
+ _UNRECOGNIZED_FIELD_MAPPINGS[cls] = field_name
+ return cls
+ return Register
+
+
+def RegisterCustomMessageCodec(encoder, decoder):
+ """Register a custom encoder/decoder for this message class."""
+ def Register(cls):
+ _CUSTOM_MESSAGE_CODECS[cls] = _Codec(encoder=encoder, decoder=decoder)
+ return cls
+ return Register
+
+
+def RegisterCustomFieldCodec(encoder, decoder):
+ """Register a custom encoder/decoder for this field."""
+ def Register(field):
+ _CUSTOM_FIELD_CODECS[field] = _Codec(encoder=encoder, decoder=decoder)
+ return field
+ return Register
+
+
+def RegisterFieldTypeCodec(encoder, decoder):
+ """Register a custom encoder/decoder for all fields of this type."""
+ def Register(field_type):
+ _FIELD_TYPE_CODECS[field_type] = _Codec(encoder=encoder,
decoder=decoder)
+ return field_type
+ return Register
+
+
+# TODO: Delete this function with the switch to proto2.
+def CopyProtoMessage(message):
+ codec = protojson.ProtoJson()
+ return codec.decode_message(type(message), codec.encode_message(message))
+
+
+def MessageToJson(message, include_fields=None):
+ """Convert the given message to JSON."""
+ result = _ProtoJsonApiTools.Get().encode_message(message)
+ return _IncludeFields(result, message, include_fields)
+
+
+def JsonToMessage(message_type, message):
+ """Convert the given JSON to a message of type message_type."""
+ return _ProtoJsonApiTools.Get().decode_message(message_type, message)
+
+
+# TODO: Do this directly, instead of via JSON.
+def DictToMessage(d, message_type):
+ """Convert the given dictionary to a message of type message_type."""
+ return JsonToMessage(message_type, json.dumps(d))
+
+
+def MessageToDict(message):
+ """Convert the given message to a dictionary."""
+ return json.loads(MessageToJson(message))
+
+
+def PyValueToMessage(message_type, value):
+ """Convert the given python value to a message of type message_type."""
+ return JsonToMessage(message_type, json.dumps(value))
+
+
+def MessageToPyValue(message):
+ """Convert the given message to a python value."""
+ return json.loads(MessageToJson(message))
+
+
+def _IncludeFields(encoded_message, message, include_fields):
+ """Add the requested fields to the encoded message."""
+ if include_fields is None:
+ return encoded_message
+ result = json.loads(encoded_message)
+ for field_name in include_fields:
+ try:
+ message.field_by_name(field_name)
+ except KeyError:
+ raise exceptions.InvalidDataError(
+ 'No field named %s in message of type %s' % (
+ field_name, type(message)))
+ result[field_name] = None
+ return json.dumps(result)
+
+
+def _GetFieldCodecs(field, attr):
+ result = [
+ getattr(_CUSTOM_FIELD_CODECS.get(field), attr, None),
+ getattr(_FIELD_TYPE_CODECS.get(type(field)), attr, None),
+ ]
+ return [x for x in result if x is not None]
+
+
+class _ProtoJsonApiTools(protojson.ProtoJson):
+ """JSON encoder used by apitools clients."""
+ _INSTANCE = None
+
+ @classmethod
+ def Get(cls):
+ if cls._INSTANCE is None:
+ cls._INSTANCE = cls()
+ return cls._INSTANCE
+
+ def decode_message(self, message_type, encoded_message): # pylint:
disable=invalid-name
+ if message_type in _CUSTOM_MESSAGE_CODECS:
+ return _CUSTOM_MESSAGE_CODECS[message_type].decoder(encoded_message)
+ result = super(_ProtoJsonApiTools, self).decode_message(
+ message_type, encoded_message)
+ return _DecodeUnknownFields(result, encoded_message)
+
+ def decode_field(self, field, value): # pylint: disable=g-bad-name
+ """Decode the given JSON value.
+
+ Args:
+ field: a messages.Field for the field we're decoding.
+ value: a python value we'd like to decode.
+
+ Returns:
+ A value suitable for assignment to field.
+ """
+ for decoder in _GetFieldCodecs(field, 'decoder'):
+ result = decoder(field, value)
+ value = result.value
+ if result.complete:
+ return value
+ if isinstance(field, messages.MessageField):
+ field_value = self.decode_message(field.message_type,
json.dumps(value))
+ else:
+ field_value = super(_ProtoJsonApiTools, self).decode_field(field,
value)
+ return field_value
+
+ def encode_message(self, message): # pylint: disable=invalid-name
+ if isinstance(message, messages.FieldList):
+ return '[%s]' % (', '.join(self.encode_message(x) for x in message))
+ if type(message) in _CUSTOM_MESSAGE_CODECS:
+ return _CUSTOM_MESSAGE_CODECS[type(message)].encoder(message)
+ message = _EncodeUnknownFields(message)
+ return super(_ProtoJsonApiTools, self).encode_message(message)
+
+ def encode_field(self, field, value): # pylint: disable=g-bad-name
+ """Encode the given value as JSON.
+
+ Args:
+ field: a messages.Field for the field we're encoding.
+ value: a value for field.
+
+ Returns:
+ A python value suitable for json.dumps.
+ """
+ for encoder in _GetFieldCodecs(field, 'encoder'):
+ result = encoder(field, value)
+ value = result.value
+ if result.complete:
+ return value
+ if isinstance(field, messages.MessageField):
+ value = json.loads(self.encode_message(value))
+ return super(_ProtoJsonApiTools, self).encode_field(field, value)
+
+
+# TODO: Fold this and _IncludeFields in as codecs.
+def _DecodeUnknownFields(message, encoded_message):
+ """Rewrite unknown fields in message into message.destination."""
+ destination = _UNRECOGNIZED_FIELD_MAPPINGS.get(type(message))
+ if destination is None:
+ return message
+ pair_field = message.field_by_name(destination)
+ if not isinstance(pair_field, messages.MessageField):
+ raise exceptions.InvalidDataFromServerError(
+ 'Unrecognized fields must be mapped to a compound '
+ 'message type.')
+ pair_type = pair_field.message_type
+ # TODO: Add more error checking around the pair
+ # type being exactly what we suspect (field names, etc).
+ if isinstance(pair_type.value, messages.MessageField):
+ new_values = _DecodeUnknownMessages(
+ message, json.loads(encoded_message), pair_type)
+ else:
+ new_values = _DecodeUnrecognizedFields(message, pair_type)
+ setattr(message, destination, new_values)
+ # We could probably get away with not setting this, but
+ # why not clear it?
+ setattr(message, '_Message__unrecognized_fields', {})
+ return message
+
+
+def _DecodeUnknownMessages(message, encoded_message, pair_type):
+ """Process unknown fields in encoded_message of a message type."""
+ field_type = pair_type.value.type
+ new_values = []
+ all_field_names = [x.name for x in message.all_fields()]
+ for name, value_dict in encoded_message.iteritems():
+ if name in all_field_names:
+ continue
+ value = PyValueToMessage(field_type, value_dict)
+ new_pair = pair_type(key=name, value=value)
+ new_values.append(new_pair)
+ return new_values
+
+
+def _DecodeUnrecognizedFields(message, pair_type):
+ """Process unrecognized fields in message."""
+ new_values = []
+ for unknown_field in message.all_unrecognized_fields():
+ # TODO: Consider validating the variant if
+ # the assignment below doesn't take care of it. It may
+ # also be necessary to check it in the case that the
+ # type has multiple encodings.
+ value, _ = message.get_unrecognized_field_info(unknown_field)
+ value_type = pair_type.field_by_name('value')
+ if isinstance(value_type, messages.MessageField):
+ decoded_value = DictToMessage(value, pair_type.value.message_type)
+ else:
+ decoded_value = value
+ new_pair = pair_type(key=str(unknown_field), value=decoded_value)
+ new_values.append(new_pair)
+ return new_values
+
+
+def _EncodeUnknownFields(message):
+ """Remap unknown fields in message out of message.source."""
+ source = _UNRECOGNIZED_FIELD_MAPPINGS.get(type(message))
+ if source is None:
+ return message
+ result = CopyProtoMessage(message)
+ pairs_field = message.field_by_name(source)
+ if not isinstance(pairs_field, messages.MessageField):
+ raise exceptions.InvalidUserInputError(
+ 'Invalid pairs field %s' % pairs_field)
+ pairs_type = pairs_field.message_type
+ value_variant = pairs_type.field_by_name('value').variant
+ pairs = getattr(message, source)
+ for pair in pairs:
+ if value_variant == messages.Variant.MESSAGE:
+ encoded_value = MessageToDict(pair.value)
+ else:
+ encoded_value = pair.value
+ result.set_unrecognized_field(pair.key, encoded_value, value_variant)
+ setattr(result, source, [])
+ return result
+
+
+def _SafeEncodeBytes(field, value):
+ """Encode the bytes in value as urlsafe base64."""
+ try:
+ if field.repeated:
+ result = [base64.urlsafe_b64encode(byte) for byte in value]
+ else:
+ result = base64.urlsafe_b64encode(value)
+ complete = True
+ except TypeError:
+ result = value
+ complete = False
+ return CodecResult(value=result, complete=complete)
+
+
+def _SafeDecodeBytes(unused_field, value):
+ """Decode the urlsafe base64 value into bytes."""
+ try:
+ result = base64.urlsafe_b64decode(str(value))
+ complete = True
+ except TypeError:
+ result = value
+ complete = False
+ return CodecResult(value=result, complete=complete)
+
+
+RegisterFieldTypeCodec(_SafeEncodeBytes,
_SafeDecodeBytes)(messages.BytesField)
=======================================
--- /dev/null
+++ /third_party/gsutil/gslib/third_party/storage_apitools/exceptions.py
Mon Nov 3 12:38:28 2014 UTC
@@ -0,0 +1,133 @@
+# Copyright 2014 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Exceptions for generated client libraries."""
+
+
+class Error(Exception):
+ """Base class for all exceptions."""
+
+
+class TypecheckError(Error, TypeError):
+ """An object of an incorrect type is provided."""
+
+
+class NotFoundError(Error):
+ """A specified resource could not be found."""
+
+
+class UserError(Error):
+ """Base class for errors related to user input."""
+
+
+class InvalidDataError(Error):
+ """Base class for any invalid data error."""
+
+
+class CommunicationError(Error):
+ """Any communication error talking to an API server."""
+
+
+class HttpError(CommunicationError):
+ """Error making a request. Soon to be HttpError."""
+
+ def __init__(self, response, content, url):
+ super(HttpError, self).__init__()
+ self.response = response
+ self.content = content
+ self.url = url
+
+ def __str__(self):
+ content = self.content.decode('ascii', 'replace')
+ return 'HttpError accessing <%s>: response: <%s>, content <%s>' % (
+ self.url, self.response, content)
+
+ @property
+ def status_code(self):
+ # TODO: Turn this into something better than a
+ # KeyError if there is no status.
+ return int(self.response['status'])
+
+ @classmethod
+ def FromResponse(cls, http_response):
+ return cls(http_response.info, http_response.content,
+ http_response.request_url)
+
+
+class InvalidUserInputError(InvalidDataError):
+ """User-provided input is invalid."""
+
+
+class InvalidDataFromServerError(InvalidDataError, CommunicationError):
+ """Data received from the server is malformed."""
+
+
+class BatchError(Error):
+ """Error generated while constructing a batch request."""
+
+
+class ConfigurationError(Error):
+ """Base class for configuration errors."""
+
+
+class GeneratedClientError(Error):
+ """The generated client configuration is invalid."""
+
+
+class ConfigurationValueError(UserError):
+ """Some part of the user-specified client configuration is invalid."""
+
+
+class ResourceUnavailableError(Error):
+ """User requested an unavailable resource."""
+
+
+class CredentialsError(Error):
+ """Errors related to invalid credentials."""
+
+
+class TransferError(CommunicationError):
+ """Errors related to transfers."""
+
+
+class TransferRetryError(TransferError):
+ """Retryable errors related to transfers."""
+
+
+class TransferInvalidError(TransferError):
+ """The given transfer is invalid."""
+
+
+class RequestError(CommunicationError):
+ """The request was not successful."""
+
+
+class RetryAfterError(HttpError):
+ """The response contained a retry-after header."""
+
+ def __init__(self, response, content, url, retry_after):
+ super(RetryAfterError, self).__init__(response, content, url)
+ self.retry_after = int(retry_after)
+
+ @classmethod
+ def FromResponse(cls, http_response):
+ return cls(http_response.info, http_response.content,
+ http_response.request_url, http_response.retry_after)
+
+
+class BadStatusCodeError(HttpError):
+ """The request completed but returned a bad status code."""
+
+
+class NotYetImplementedError(GeneratedClientError):
+ """This functionality is not yet implemented."""
=======================================
--- /dev/null
+++ /third_party/gsutil/gslib/third_party/storage_apitools/extra_types.py
Mon Nov 3 12:38:28 2014 UTC
@@ -0,0 +1,246 @@
+# Copyright 2014 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Extra types understood by apitools.
+
+This file will be replaced by a .proto file when we switch to proto2
+from protorpc.
+"""
+
+import collections
+import json
+import numbers
+
+from gslib.third_party.protorpc import message_types
+from gslib.third_party.protorpc import messages
+from gslib.third_party.protorpc import protojson
+
+from gslib.third_party.storage_apitools import encoding
+from gslib.third_party.storage_apitools import exceptions
+from gslib.third_party.storage_apitools import util
+
+__all__ = [
+ 'DateTimeMessage',
+ 'JsonArray',
+ 'JsonObject',
+ 'JsonValue',
+ 'JsonProtoEncoder',
+ 'JsonProtoDecoder',
+]
+
+# We import from protorpc.
+# pylint:disable=invalid-name
+DateTimeMessage = message_types.DateTimeMessage
+# pylint:enable=invalid-name
+
+
+def _ValidateJsonValue(json_value):
+ entries = [(f, json_value.get_assigned_value(f.name))
+ for f in json_value.all_fields()]
+ assigned_entries = [(f, value) for f, value in entries if value is not
None]
+ if len(assigned_entries) != 1:
+ raise exceptions.InvalidDataError('Malformed JsonValue: %s' %
json_value)
+
+
+def _JsonValueToPythonValue(json_value):
+ """Convert the given JsonValue to a json string."""
+ util.Typecheck(json_value, JsonValue)
+ _ValidateJsonValue(json_value)
+ if json_value.is_null:
+ return None
+ entries = [(f, json_value.get_assigned_value(f.name))
+ for f in json_value.all_fields()]
+ assigned_entries = [(f, value) for f, value in entries if value is not
None]
+ field, value = assigned_entries[0]
+ if not isinstance(field, messages.MessageField):
+ return value
+ elif field.message_type is JsonObject:
+ return _JsonObjectToPythonValue(value)
+ elif field.message_type is JsonArray:
+ return _JsonArrayToPythonValue(value)
+
+
+def _JsonObjectToPythonValue(json_value):
+ util.Typecheck(json_value, JsonObject)
+ return dict([(prop.key, _JsonValueToPythonValue(prop.value)) for prop
+ in json_value.properties])
+
+
+def _JsonArrayToPythonValue(json_value):
+ util.Typecheck(json_value, JsonArray)
+ return [_JsonValueToPythonValue(e) for e in json_value.entries]
+
+
+_MAXINT64 = 2 << 63 - 1
+_MININT64 = -(2 << 63)
+
+
+def _PythonValueToJsonValue(py_value):
+ """Convert the given python value to a JsonValue."""
+ if py_value is None:
+ return JsonValue(is_null=True)
+ if isinstance(py_value, bool):
+ return JsonValue(boolean_value=py_value)
+ if isinstance(py_value, basestring):
+ return JsonValue(string_value=py_value)
+ if isinstance(py_value, numbers.Number):
+ if isinstance(py_value, (int, long)):
+ if _MININT64 < py_value < _MAXINT64:
+ return JsonValue(integer_value=py_value)
+ return JsonValue(double_value=float(py_value))
+ if isinstance(py_value, dict):
+ return JsonValue(object_value=_PythonValueToJsonObject(py_value))
+ if isinstance(py_value, collections.Iterable):
+ return JsonValue(array_value=_PythonValueToJsonArray(py_value))
+ raise exceptions.InvalidDataError(
+ 'Cannot convert "%s" to JsonValue' % py_value)
+
+
+def _PythonValueToJsonObject(py_value):
+ util.Typecheck(py_value, dict)
+ return JsonObject(
+ properties=[
+ JsonObject.Property(key=key,
value=_PythonValueToJsonValue(value))
+ for key, value in py_value.iteritems()])
+
+
+def _PythonValueToJsonArray(py_value):
+ return JsonArray(entries=map(_PythonValueToJsonValue, py_value))
+
+
+class JsonValue(messages.Message):
+ """Any valid JSON value."""
+ # Is this JSON object `null`?
+ is_null = messages.BooleanField(1, default=False)
+
+ # Exactly one of the following is provided if is_null is False; none
+ # should be provided if is_null is True.
+ boolean_value = messages.BooleanField(2)
+ string_value = messages.StringField(3)
+ # We keep two numeric fields to keep int64 round-trips exact.
+ double_value = messages.FloatField(4, variant=messages.Variant.DOUBLE)
+ integer_value = messages.IntegerField(5, variant=messages.Variant.INT64)
+ # Compound types
+ object_value = messages.MessageField('JsonObject', 6)
+ array_value = messages.MessageField('JsonArray', 7)
+
+
+class JsonObject(messages.Message):
+ """A JSON object value.
+
+ Messages:
+ Property: A property of a JsonObject.
+
+ Fields:
+ properties: A list of properties of a JsonObject.
+ """
+
+ class Property(messages.Message):
+ """A property of a JSON object.
+
+ Fields:
+ key: Name of the property.
+ value: A JsonValue attribute.
+ """
+ key = messages.StringField(1)
+ value = messages.MessageField(JsonValue, 2)
+
+ properties = messages.MessageField(Property, 1, repeated=True)
+
+
+class JsonArray(messages.Message):
+ """A JSON array value."""
+ entries = messages.MessageField(JsonValue, 1, repeated=True)
+
+
+_JSON_PROTO_TO_PYTHON_MAP = {
+ JsonArray: _JsonArrayToPythonValue,
+ JsonObject: _JsonObjectToPythonValue,
+ JsonValue: _JsonValueToPythonValue,
+}
+_JSON_PROTO_TYPES = tuple(_JSON_PROTO_TO_PYTHON_MAP.keys())
+
+
+def _JsonProtoToPythonValue(json_proto):
+ util.Typecheck(json_proto, _JSON_PROTO_TYPES)
+ return _JSON_PROTO_TO_PYTHON_MAP[type(json_proto)](json_proto)
+
+
+def _PythonValueToJsonProto(py_value):
+ if isinstance(py_value, dict):
+ return _PythonValueToJsonObject(py_value)
+ if (isinstance(py_value, collections.Iterable) and
+ not isinstance(py_value, basestring)):
+ return _PythonValueToJsonArray(py_value)
+ return _PythonValueToJsonValue(py_value)
+
+
+def _JsonProtoToJson(json_proto, unused_encoder=None):
+ return json.dumps(_JsonProtoToPythonValue(json_proto))
+
+
+def _JsonToJsonProto(json_data, unused_decoder=None):
+ return _PythonValueToJsonProto(json.loads(json_data))
+
+
+# pylint:disable=invalid-name
+JsonProtoEncoder = _JsonProtoToJson
+JsonProtoDecoder = _JsonToJsonProto
+# pylint:enable=invalid-name
+encoding.RegisterCustomMessageCodec(
+ encoder=JsonProtoEncoder, decoder=JsonProtoDecoder)(JsonValue)
+encoding.RegisterCustomMessageCodec(
+ encoder=JsonProtoEncoder, decoder=JsonProtoDecoder)(JsonObject)
+encoding.RegisterCustomMessageCodec(
+ encoder=JsonProtoEncoder, decoder=JsonProtoDecoder)(JsonArray)
+
+
+def _EncodeDateTimeField(field, value):
+ result = protojson.ProtoJson().encode_field(field, value)
+ return encoding.CodecResult(value=result, complete=True)
+
+
+def _DecodeDateTimeField(unused_field, value):
+ result = protojson.ProtoJson().decode_field(
+ message_types.DateTimeField(1), value)
+ return encoding.CodecResult(value=result, complete=True)
+
+
+encoding.RegisterFieldTypeCodec(_EncodeDateTimeField,
_DecodeDateTimeField)(
+ message_types.DateTimeField)
+
+
+# Handle the int64<-->string conversion apiary requires
+def _EncodeInt64Field(field, value):
+ """Handle the special case of int64 as a string."""
+ capabilities = [
+ messages.Variant.INT64,
+ messages.Variant.UINT64,
+ ]
+ if field.variant not in capabilities:
+ return encoding.CodecResult(value=value, complete=False)
+
+ if field.repeated:
+ result = [str(x) for x in value]
+ else:
+ result = str(value)
+ return encoding.CodecResult(value=result, complete=True)
+
+
+def _DecodeInt64Field(unused_field, value):
+ # Don't need to do anything special, they're decoded just fine
+ return encoding.CodecResult(value=value, complete=False)
+
+encoding.RegisterFieldTypeCodec(_EncodeInt64Field, _DecodeInt64Field)(
+ messages.IntegerField)
+
=======================================
--- /dev/null
+++ /third_party/gsutil/gslib/third_party/storage_apitools/http_wrapper.py
Mon Nov 3 12:38:28 2014 UTC
@@ -0,0 +1,337 @@
+# Copyright 2014 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""HTTP wrapper for apitools.
+
+This library wraps the underlying http library we use, which is
+currently httplib2.
+"""
+
+import collections
+import contextlib
+import httplib
+import logging
+import socket
+import time
+import urlparse
+
+import httplib2
+
+from gslib.third_party.storage_apitools import exceptions
+from gslib.third_party.storage_apitools import util
+
+__all__ = [
+ 'GetHttp',
+ 'MakeRequest',
+]
+
+
+# 308 and 429 don't have names in httplib.
+RESUME_INCOMPLETE = 308
+TOO_MANY_REQUESTS = 429
+_REDIRECT_STATUS_CODES = (
+ httplib.MOVED_PERMANENTLY,
+ httplib.FOUND,
+ httplib.SEE_OTHER,
+ httplib.TEMPORARY_REDIRECT,
+ RESUME_INCOMPLETE,
+)
+
+# http: An httplib2.Http instance.
+# http_request: A http_wrapper.Request.
+# exc: Exception being raised.
+# num_retries: Number of retries consumed; used for exponential backoff.
+ExceptionRetryArgs = collections.namedtuple('ExceptionRetryArgs',
+ ['http', 'http_request', 'exc',
+ 'num_retries'])
+
+
+...@contextlib.contextmanager
+def _Httplib2Debuglevel(http_request, level, http=None):
+ """Temporarily change the value of httplib2.debuglevel if needed.
+
+ If http_request has a `loggable_body` distinct from `body`, then we
+ need to prevent httplib2 from logging the full body. This sets
+ httplib2.debuglevel for the duration of the `with` block; however,
+ that alone won't change the value of existing HTTP connections. If
+ an httplib2.Http object is provided, we'll also change the level on
+ any cached connections attached to it.
+ """
+ if http_request.loggable_body is None:
+ yield
+ return
+ old_level = httplib2.debuglevel
+ http_levels = {}
+ httplib2.debuglevel = level
+ if http is not None:
+ for connection_key, connection in http.connections.iteritems():
+ # httplib2 stores two kinds of values in this dict, connection
+ # classes and instances. Since the connection types are all
+ # old-style classes, we can't easily distinguish by connection
+ # type -- so instead we use the key pattern.
+ if ':' not in connection_key:
+ continue
+ http_levels[connection_key] = connection.debuglevel
+ connection.set_debuglevel(level)
+ yield
+ httplib2.debuglevel = old_level
+ if http is not None:
+ for connection_key, old_level in http_levels:
+ if connection_key in http.connections:
+ http.connections[connection_key].set_debuglevel(old_level)
+
+
+class Request(object):
+ """Class encapsulating the data for an HTTP request."""
+
+ def __init__(self, url='', http_method='GET', headers=None, body=''):
+ self.url = url
+ self.http_method = http_method
+ self.headers = headers or {}
+ self.__body = None
+ self.__loggable_body = None
+ self.body = body
+
+ @property
+ def loggable_body(self):
+ return self.__loggable_body
+
+ @loggable_body.setter
+ def loggable_body(self, value):
+ if self.body is None:
+ raise exceptions.RequestError(
+ 'Cannot set loggable body on request with no body')
+ self.__loggable_body = value
+
+ @property
+ def body(self):
+ return self.__body
+
+ @body.setter
+ def body(self, value):
+ self.__body = value
+ if value is not None:
+ self.headers['content-length'] = str(len(self.__body))
+ else:
+ self.headers.pop('content-length', None)
+ # This line ensures we don't try to print large requests.
+ if not isinstance(value, basestring):
+ self.loggable_body = '<media body>'
+
+
+# Note: currently the order of fields here is important, since we want
+# to be able to pass in the result from httplib2.request.
+class Response(collections.namedtuple(
+ 'HttpResponse', ['info', 'content', 'request_url'])):
+ """Class encapsulating data for an HTTP response."""
+ __slots__ = ()
+
+ def __len__(self):
+ return self.length
+
+ @property
+ def length(self):
+ """Return the length of this response.
+
+ We expose this as an attribute since using len() directly can fail
+ for responses larger than sys.maxint.
+
+ Returns:
+ Response length (as int or long)
+ """
+ def ProcessContentRange(content_range):
+ _, _, range_spec = content_range.partition(' ')
+ byte_range, _, _ = range_spec.partition('/')
+ start, _, end = byte_range.partition('-')
+ return int(end) - int(start) + 1
+
+ if '-content-encoding' in self.info and 'content-range' in self.info:
+ # httplib2 rewrites content-length in the case of a compressed
+ # transfer; we can't trust the content-length header in that
+ # case, but we *can* trust content-range, if it's present.
+ return ProcessContentRange(self.info['content-range'])
+ elif 'content-length' in self.info:
+ return int(self.info.get('content-length'))
+ elif 'content-range' in self.info:
+ return ProcessContentRange(self.info['content-range'])
+ return len(self.content)
+
+ @property
+ def status_code(self):
+ return int(self.info['status'])
+
+ @property
+ def retry_after(self):
+ if 'retry-after' in self.info:
+ return int(self.info['retry-after'])
+
+ @property
+ def is_redirect(self):
+ return (self.status_code in _REDIRECT_STATUS_CODES and
+ 'location' in self.info)
+
+
+def CheckResponse(response):
+ if response is None:
+ # Caller shouldn't call us if the response is None, but handle anyway.
+ raise exceptions.RequestError('Request to url %s did not return a
response.'
+ % response.request_url)
+ elif (response.status_code >= 500 or
+ response.status_code == TOO_MANY_REQUESTS):
+ raise exceptions.BadStatusCodeError.FromResponse(response)
+ elif response.status_code == httplib.UNAUTHORIZED:
+ # Sometimes we get a 401 after a connection break.
+ # TODO: this shouldn't be a retryable exception, but for now we retry.
+ raise exceptions.BadStatusCodeError.FromResponse(response)
+ elif response.retry_after:
+ raise exceptions.RetryAfterError.FromResponse(response)
+
+
+def RebuildHttpConnections(http):
+ """Rebuilds all http connections in the httplib2.Http instance.
+
+ httplib2 overloads the map in http.connections to contain two different
+ types of values:
+ { scheme string: connection class } and
+ { scheme + authority string : actual http connection }
+ Here we remove all of the entries for actual connections so that on the
+ next request httplib2 will rebuild them from the connection types.
+
+ Args:
+ http: An httplib2.Http instance.
+ """
+ if getattr(http, 'connections', None):
+ for conn_key in http.connections.keys():
+ if ':' in conn_key:
+ del http.connections[conn_key]
+
+
+def RethrowExceptionHandler(*unused_args):
+ raise
+
+
+def HandleExceptionsAndRebuildHttpConnections(retry_args):
+ """Exception handler for http failures.
+
+ This catches known failures and rebuilds the underlying HTTP connections.
+
+ Args:
+ retry_args: An ExceptionRetryArgs tuple.
+ """
+ retry_after = None
+ if isinstance(retry_args.exc, httplib.BadStatusLine):
+ logging.error('Caught BadStatusLine from httplib, retrying: %s',
+ retry_args.exc)
+ elif isinstance(retry_args.exc, socket.error):
+ logging.error('Caught socket error, retrying: %s', retry_args.exc)
+ elif isinstance(retry_args.exc, exceptions.BadStatusCodeError):
+ logging.error('Response returned status %s, retrying',
+ retry_args.exc.status_code)
+ elif isinstance(retry_args.exc, exceptions.RetryAfterError):
+ logging.error('Response returned a retry-after header, retrying')
+ retry_after = retry_args.exc.retry_after
+ elif isinstance(retry_args.exc, ValueError):
+ # oauth2_client tries to JSON-decode the response, which can result
+ # in a ValueError if the response was invalid. Until that is fixed in
+ # oauth2_client, need to handle it here.
+ logging.error('Response content was invalid (%s), retrying',
+ retry_args.exc)
+ elif isinstance(retry_args.exc, exceptions.RequestError):
+ logging.error('Request returned no response, retrying')
+ else:
+ raise
+ RebuildHttpConnections(retry_args.http)
+ logging.error('Retrying request to url %s after exception %s',
+ retry_args.http_request.url, retry_args.exc)
+ time.sleep(retry_after or
util.CalculateWaitForRetry(retry_args.num_retries))
+
+
+def MakeRequest(http, http_request, retries=7, redirections=5,
+ retry_func=HandleExceptionsAndRebuildHttpConnections,
+ check_response_func=CheckResponse):
+ """Send http_request via the given http, performing error/retry handling.
+
+ Args:
+ http: An httplib2.Http instance, or a http multiplexer that delegates
to
+ an underlying http, for example, HTTPMultiplexer.
+ http_request: A Request to send.
+ retries: (int, default 5) Number of retries to attempt on 5XX replies.
+ redirections: (int, default 5) Number of redirects to follow.
+ retry_func: Function to handle retries on exceptions. Arguments are
+ (Httplib2.Http, Request, Exception, int num_retries).
+ check_response_func: Function to validate the HTTP response. Arguments
are
+ (Response, response content, url).
+
+ Returns:
+ A Response object.
+ """
+ retry = 0
+ while True:
+ try:
+ return _MakeRequestNoRetry(http, http_request,
redirections=redirections,
+ check_response_func=check_response_func)
+ # retry_func will consume the exception types it handles and raise.
+ # pylint: disable=broad-except
+ except Exception as e:
+ retry += 1
+ if retry >= retries:
+ raise
+ else:
+ retry_func(ExceptionRetryArgs(http, http_request, e, retry))
+
+
+def _MakeRequestNoRetry(http, http_request, redirections=5,
+ check_response_func=CheckResponse):
+ """Send http_request via the given http.
+
+ This wrapper exists to handle translation between the plain httplib2
+ request/response types and the Request and Response types above.
+
+ Args:
+ http: An httplib2.Http instance, or a http multiplexer that delegates
to
+ an underlying http, for example, HTTPMultiplexer.
+ http_request: A Request to send.
+ redirections: (int, default 5) Number of redirects to follow.
+ check_response_func: Function to validate the HTTP response. Arguments
are
+ (Response, response content, url).
+
+ Returns:
+ Response object.
+
+ Raises:
+ RequestError if no response could be parsed.
+ """
+ connection_type = None
+ if getattr(http, 'connections', None):
+ url_scheme = urlparse.urlsplit(http_request.url).scheme
+ if url_scheme and url_scheme in http.connections:
+ connection_type = http.connections[url_scheme]
+
+ # Custom printing only at debuglevel 4
+ new_debuglevel = 4 if httplib2.debuglevel == 4 else 0
+ with _Httplib2Debuglevel(http_request, new_debuglevel, http=http):
+ info, content = http.request(
+ str(http_request.url), method=str(http_request.http_method),
+ body=http_request.body, headers=http_request.headers,
+ redirections=redirections, connection_type=connection_type)
+
+ if info is None:
+ raise exceptions.RequestError()
+
+ response = Response(info, content, http_request.url)
+ check_response_func(response)
+ return response
+
+
+def GetHttp():
+ return httplib2.Http()
=======================================
***Additional files exist in this changeset.***

Reply all

Reply to author

Forward

0 new messages