# -*- coding: utf-8 -*-
# Copyright 2015 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Implementation of rewrite command (in-place cloud object transformation)."""
from __future__ import absolute_import
from apitools.base.py import encoding
from gslib.cloud_api import EncryptionException
from gslib.command import Command
from gslib.command_argument import CommandArgument
from gslib.cs_api_map import ApiSelector
from gslib.encryption_helper import CryptoTupleFromKey
from gslib.encryption_helper import FindMatchingCryptoKey
from gslib.encryption_helper import GetEncryptionTupleAndSha256Hash
from gslib.exception import CommandException
from gslib.name_expansion import NameExpansionIterator
from gslib.progress_callback import ConstructAnnounceText
from gslib.progress_callback import FileProgressCallbackHandler
from gslib.storage_url import StorageUrlFromString
from gslib.third_party.storage_apitools import storage_v1_messages as apitools_messages
from gslib.translation_helper import PreconditionsFromHeaders
from gslib.util import ConvertRecursiveToFlatWildcard
from gslib.util import GetCloudApiInstance
from gslib.util import NO_MAX
from gslib.util import StdinIterator
_SYNOPSIS = """
gsutil rewrite -k [-f] [-r] url...
gsutil rewrite -k [-f] [-r] -I
"""
_DETAILED_HELP_TEXT = ("""
SYNOPSIS
""" + _SYNOPSIS + """
DESCRIPTION
The gsutil rewrite command performs in-place transformations on cloud objects.
The transformation(s) are atomic and applied based on the input
transformation flags. Currently, only the "-k" flag is supported to add,
rotate, or remove encryption keys on objects.
For example, the command:
gsutil rewrite -k gs://bucket/**
will update all objects in gs://bucket with the current encryption key
from your boto config file.
You can also use the -r option to specify recursive object transform; this is
synonymous with the ** wildcard. Thus, either of the following two commands
will perform encryption key transforms on gs://bucket/subdir and all objects
and subdirectories under it:
gsutil rewrite -k gs://bucket/subdir**
gsutil rewrite -k -r gs://bucket/subdir
The rewrite command acts only on live object versions, so specifying a
URL with a generation will fail. If you want to rewrite an archived
generation, first copy it to the live version, then rewrite it, for example:
gsutil cp gs://bucket/object#123 gs://bucket/object
gsutil rewrite -k gs://bucket/object
The rewrite command will skip objects that are already in the desired state.
For example, if you run:
gsutil rewrite -k gs://bucket/**
and gs://bucket contains objects that already match the encryption
configuration, gsutil will skip rewriting those objects and only rewrite
objects that do not match the encryption configuration.
You can pass a list of URLs (one per line) to rewrite on stdin instead of as
command line arguments by using the -I option. This allows you to use gsutil
in a pipeline to rewrite objects identified by a program, such as:
some_program | gsutil -m rewrite -k -I
The contents of stdin can name cloud URLs and wildcards of cloud URLs.
The rewrite command requires OWNER permissions on each object to preserve
object ACLs. You can bypass this by using the -O flag, which will cause
gsutil not to read the object's ACL and instead apply the default object ACL
to the rewritten object:
gsutil rewrite -k -O gs://bucket/**
OPTIONS
-f Continues silently (without printing error messages) despite
errors when rewriting multiple objects. If some of the objects
could not be rewritten, gsutil's exit status will be non-zero
even if this flag is set. This option is implicitly set when
running "gsutil -m rewrite ...".
-I Causes gsutil to read the list of objects to rewrite from stdin.
This allows you to run a program that generates the list of
objects to rewrite.
-k Rewrite the objects to the current encryption key specific in
your boto configuration file. If encryption_key is specified,
encrypt all objects with this key. If encryption_key is
unspecified, decrypt all objects. See `gsutil help encryption`
for details on encryption configuration.
-O Rewrite objects with the bucket's default object ACL instead of
the existing object ACL. This is needed if you do not have
OWNER permission on the object.
-R, -r The -R and -r options are synonymous. Causes bucket or bucket
subdirectory contents to be rewritten recursively.
""")
def _RewriteExceptionHandler(cls, e):
"""Simple exception handler to allow post-completion status."""
if not cls.continue_on_error:
cls.logger.error(str(e))
cls.op_failure_count += 1
def _RewriteFuncWrapper(cls, name_expansion_result, thread_state=None):
cls.RewriteFunc(name_expansion_result, thread_state=thread_state)
def GenerationCheckGenerator(url_strs):
"""Generator function that ensures generation-less (live) arguments."""
for url_str in url_strs:
if StorageUrlFromString(url_str).generation is not None:
raise CommandException(
'"rewrite" called on URL with generation (%s).' % url_str)
yield url_str
class _TransformTypes(object):
"""Enum class for valid transforms."""
CRYPTO_KEY = 'crypto_key'
class RewriteCommand(Command):
"""Implementation of gsutil rewrite command."""
# Command specification. See base class for documentation.
command_spec = Command.CreateCommandSpec(
'rewrite',
command_name_aliases=[],
usage_synopsis=_SYNOPSIS,
min_args=0,
max_args=NO_MAX,
supported_sub_args='fkIrRO',
file_url_ok=False,
provider_url_ok=False,
urls_start_arg=0,
gs_api_support=[ApiSelector.JSON],
gs_default_api=ApiSelector.JSON,
argparse_arguments=[
CommandArgument.MakeZeroOrMoreCloudURLsArgument()
]
)
# Help specification. See help_provider.py for documentation.
help_spec = Command.HelpSpec(
help_name='rewrite',
help_name_aliases=['rekey', 'rotate'],
help_type='command_help',
help_one_line_summary='Rewrite objects',
help_text=_DETAILED_HELP_TEXT,
subcommand_help_text={},
)
def CheckProvider(self, url):
if url.scheme != 'gs':
raise CommandException(
'"rewrite" called on URL with unsupported provider (%s).' % str(url))
def RunCommand(self):
"""Command entry point for the rewrite command."""
self.continue_on_error = self.parallel_operations
self.read_args_from_stdin = False
self.no_preserve_acl = False
self.supported_transformation_flags = ['-k']
self.transform_types = []
self.op_failure_count = 0
self.current_encryption_tuple, self.current_encryption_sha256 = (
GetEncryptionTupleAndSha256Hash())
if self.sub_opts:
for o, unused_a in self.sub_opts:
if o == '-f':
self.continue_on_error = True
elif o == '-k':
self.transform_types.append(_TransformTypes.CRYPTO_KEY)
elif o == '-I':
self.read_args_from_stdin = True
elif o == '-O':
self.no_preserve_acl = True
elif o == '-r' or o == '-R':
self.recursion_requested = True
self.all_versions = True
if self.read_args_from_stdin:
if self.args:
raise CommandException('No arguments allowed with the -I flag.')
url_strs = StdinIterator()
else:
if not self.args:
raise CommandException('The rewrite command (without -I) expects at '
'least one URL.')
url_strs = self.args
url_strs = GenerationCheckGenerator(url_strs)
if not self.transform_types:
raise CommandException(
'rewrite command requires at least one transformation flag. '
'Currently supported transformation flags: %s' %
self.supported_transformation_flags)
self.preconditions = PreconditionsFromHeaders(self.headers or {})
# Convert recursive flag to flat wildcard to avoid performing multiple
# listings.
if self.recursion_requested:
url_strs = ConvertRecursiveToFlatWildcard(url_strs)
# Expand the source argument(s).
name_expansion_iterator = NameExpansionIterator(
self.command_name, self.debug, self.logger, self.gsutil_api,
url_strs, self.recursion_requested, project_id=self.project_id,
continue_on_error=self.continue_on_error or self.parallel_operations)
# Perform rewrite requests in parallel (-m) mode, if requested.
self.Apply(_RewriteFuncWrapper, name_expansion_iterator,
_RewriteExceptionHandler,
fail_on_error=(not self.continue_on_error),
shared_attrs=['op_failure_count'])
if self.op_failure_count:
plural_str = 's' if self.op_failure_count else ''
raise CommandException('%d file%s/object%s could not be rewritten.' % (
self.op_failure_count, plural_str, plural_str))
return 0
def RewriteFunc(self, name_expansion_result, thread_state=None):
gsutil_api = GetCloudApiInstance(self, thread_state=thread_state)
self.CheckProvider(name_expansion_result.expanded_storage_url)
# If other transform types are added here, they must ensure that the
# encryption key configuration matches the boto configuration, because
# gsutil maintains an invariant that all objects it writes use the
# encryption_key value (including decrypting if no key is present).
if _TransformTypes.CRYPTO_KEY in self.transform_types:
self.CryptoRewrite(name_expansion_result.expanded_storage_url, gsutil_api)
def CryptoRewrite(self, transform_url, gsutil_api):
"""Make the cloud object at transform_url match encryption configuration.
Args:
transform_url: CloudUrl to rewrite.
gsutil_api: gsutil CloudApi instance for making API calls.
"""
# Get all fields so that we can ensure that the target metadata is
# specified correctly.
src_metadata = gsutil_api.GetObjectMetadata(
transform_url.bucket_name, transform_url.object_name,
generation=transform_url.generation, provider=transform_url.scheme)
if self.no_preserve_acl:
# Leave ACL unchanged.
src_metadata.acl = []
elif not src_metadata.acl:
raise CommandException(
'No OWNER permission found for object %s. OWNER permission is '
'required for rewriting objects, (otherwise their ACLs would be '
'reset).' % transform_url)
src_encryption_sha256 = None
if (src_metadata.customerEncryption and
src_metadata.customerEncryption.keySha256):
src_encryption_sha256 = src_metadata.customerEncryption.keySha256
if src_encryption_sha256 == self.current_encryption_sha256:
if self.current_encryption_sha256 is not None:
self.logger.info('Skipping %s, already has current encryption key' %
transform_url)
else:
self.logger.info('Skipping %s, already decrypted' % transform_url)
else:
# Make a deep copy of the source metadata
dst_metadata = encoding.PyValueToMessage(
apitools_messages.Object, encoding.MessageToPyValue(src_metadata))
# Remove some unnecessary/invalid fields.
dst_metadata.customerEncryption = None
dst_metadata.generation = None
# Service has problems if we supply an ID, but it is responsible for
# generating one, so it is not necessary to include it here.
dst_metadata.id = None
decryption_tuple = None
if src_encryption_sha256 is None:
announce_text = 'Encrypting'
else:
decryption_key = FindMatchingCryptoKey(src_encryption_sha256)
if not decryption_key:
raise EncryptionException(
'Missing decryption key with SHA256 hash %s. No decryption key '
'matches object %s' % (src_encryption_sha256, transform_url))
decryption_tuple = CryptoTupleFromKey(decryption_key)
if self.current_encryption_sha256 is None:
announce_text = 'Decrypting'
else:
announce_text = 'Rotating'
progress_callback = FileProgressCallbackHandler(
ConstructAnnounceText(announce_text, transform_url.url_string),
self.logger).call
gsutil_api.CopyObject(
src_metadata, dst_metadata, src_generation=transform_url.generation,
preconditions=self.preconditions, progress_callback=progress_callback,
decryption_tuple=decryption_tuple,
encryption_tuple=self.current_encryption_tuple,
provider=transform_url.scheme, fields=[])