# Copyright (c) 2012 Amazon.com, Inc. or its affiliates. All Rights Reserved # # Permission is hereby granted, free of charge, to any person obtaining a # copy of this software and associated documentation files (the # "Software"), to deal in the Software without restriction, including # without limitation the rights to use, copy, modify, merge, publish, dis- # tribute, sublicense, and/or sell copies of the Software, and to permit # persons to whom the Software is furnished to do so, subject to the fol- # lowing conditions: # # The above copyright notice and this permission notice shall be included # in all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS # IN THE SOFTWARE. # from hashlib import sha256 import itertools from boto.compat import StringIO from tests.unit import unittest from mock import ( call, Mock, sentinel, ) from nose.tools import assert_equal from boto.glacier.layer1 import Layer1 from boto.glacier.vault import Vault from boto.glacier.writer import Writer, resume_file_upload from boto.glacier.utils import bytes_to_hex, chunk_hashes, tree_hash def create_mock_vault(): vault = Mock(spec=Vault) vault.layer1 = Mock(spec=Layer1) vault.layer1.complete_multipart_upload.return_value = dict( ArchiveId=sentinel.archive_id) vault.name = sentinel.vault_name return vault def partify(data, part_size): for i in itertools.count(0): start = i * part_size part = data[start:start + part_size] if part: yield part else: return def calculate_mock_vault_calls(data, part_size, chunk_size): upload_part_calls = [] data_tree_hashes = [] for i, data_part in enumerate(partify(data, part_size)): start = i * part_size end = start + len(data_part) data_part_tree_hash_blob = tree_hash( chunk_hashes(data_part, chunk_size)) data_part_tree_hash = bytes_to_hex(data_part_tree_hash_blob) data_part_linear_hash = sha256(data_part).hexdigest() upload_part_calls.append( call.layer1.upload_part( sentinel.vault_name, sentinel.upload_id, data_part_linear_hash, data_part_tree_hash, (start, end - 1), data_part)) data_tree_hashes.append(data_part_tree_hash_blob) return upload_part_calls, data_tree_hashes def check_mock_vault_calls(vault, upload_part_calls, data_tree_hashes, data_len): vault.layer1.upload_part.assert_has_calls( upload_part_calls, any_order=True) assert_equal( len(upload_part_calls), vault.layer1.upload_part.call_count) data_tree_hash = bytes_to_hex(tree_hash(data_tree_hashes)) vault.layer1.complete_multipart_upload.assert_called_once_with( sentinel.vault_name, sentinel.upload_id, data_tree_hash, data_len) class TestWriter(unittest.TestCase): def setUp(self): super(TestWriter, self).setUp() self.vault = create_mock_vault() self.chunk_size = 2 # power of 2 self.part_size = 4 # power of 2 upload_id = sentinel.upload_id self.writer = Writer( self.vault, upload_id, self.part_size, self.chunk_size) def check_write(self, write_list): for write_data in write_list: self.writer.write(write_data) self.writer.close() data = b''.join(write_list) upload_part_calls, data_tree_hashes = calculate_mock_vault_calls( data, self.part_size, self.chunk_size) check_mock_vault_calls( self.vault, upload_part_calls, data_tree_hashes, len(data)) def test_single_byte_write(self): self.check_write([b'1']) def test_one_part_write(self): self.check_write([b'1234']) def test_split_write_1(self): self.check_write([b'1', b'234']) def test_split_write_2(self): self.check_write([b'12', b'34']) def test_split_write_3(self): self.check_write([b'123', b'4']) def test_one_part_plus_one_write(self): self.check_write([b'12345']) def test_returns_archive_id(self): self.writer.write(b'1') self.writer.close() self.assertEquals(sentinel.archive_id, self.writer.get_archive_id()) def test_current_tree_hash(self): self.writer.write(b'1234') self.writer.write(b'567') hash_1 = self.writer.current_tree_hash self.assertEqual(hash_1, b'\x0e\xb0\x11Z\x1d\x1f\n\x10|\xf76\xa6\xf5' + b'\x83\xd1\xd5"bU\x0c\x95\xa8<\xf5\x81\xef\x0e\x0f\x95\n\xb7k' ) # This hash will be different, since the content has changed. self.writer.write(b'22i3uy') hash_2 = self.writer.current_tree_hash self.assertEqual(hash_2, b'\x7f\xf4\x97\x82U]\x81R\x05#^\xe8\x1c\xd19' + b'\xe8\x1f\x9e\xe0\x1aO\xaad\xe5\x06"\xa5\xc0\xa8AdL' ) self.writer.close() # Check the final tree hash, post-close. final_hash = self.writer.current_tree_hash self.assertEqual(final_hash, b';\x1a\xb8!=\xf0\x14#\x83\x11\xd5\x0b\x0f' + b'\xc7D\xe4\x8e\xd1W\x99z\x14\x06\xb9D\xd0\xf0*\x93\xa2\x8e\xf9' ) # Then assert we don't get a different one on a subsequent call. self.assertEqual(final_hash, self.writer.current_tree_hash) def test_current_uploaded_size(self): self.writer.write(b'1234') self.writer.write(b'567') size_1 = self.writer.current_uploaded_size self.assertEqual(size_1, 4) # This hash will be different, since the content has changed. self.writer.write(b'22i3uy') size_2 = self.writer.current_uploaded_size self.assertEqual(size_2, 12) self.writer.close() # Get the final size, post-close. final_size = self.writer.current_uploaded_size self.assertEqual(final_size, 13) # Then assert we don't get a different one on a subsequent call. self.assertEqual(final_size, self.writer.current_uploaded_size) def test_upload_id(self): self.assertEquals(sentinel.upload_id, self.writer.upload_id) class TestResume(unittest.TestCase): def setUp(self): super(TestResume, self).setUp() self.vault = create_mock_vault() self.chunk_size = 2 # power of 2 self.part_size = 4 # power of 2 def check_no_resume(self, data, resume_set=set()): fobj = StringIO(data.decode('utf-8')) part_hash_map = {} for part_index in resume_set: start = self.part_size * part_index end = start + self.part_size part_data = data[start:end] part_hash_map[part_index] = tree_hash( chunk_hashes(part_data, self.chunk_size)) resume_file_upload( self.vault, sentinel.upload_id, self.part_size, fobj, part_hash_map, self.chunk_size) upload_part_calls, data_tree_hashes = calculate_mock_vault_calls( data, self.part_size, self.chunk_size) resume_upload_part_calls = [ call for part_index, call in enumerate(upload_part_calls) if part_index not in resume_set] check_mock_vault_calls( self.vault, resume_upload_part_calls, data_tree_hashes, len(data)) def test_one_part_no_resume(self): self.check_no_resume(b'1234') def test_two_parts_no_resume(self): self.check_no_resume(b'12345678') def test_one_part_resume(self): self.check_no_resume(b'1234', resume_set=set([0])) def test_two_parts_one_resume(self): self.check_no_resume(b'12345678', resume_set=set([1])) def test_returns_archive_id(self): archive_id = resume_file_upload( self.vault, sentinel.upload_id, self.part_size, StringIO('1'), {}, self.chunk_size) self.assertEquals(sentinel.archive_id, archive_id)