Optimize repack command.

This commit is contained in:
Bastian Kleineidam 2013-02-21 17:51:34 +01:00
parent e879d211fd
commit 69ce2740ca
4 changed files with 65 additions and 12 deletions

View File

@ -1,6 +1,8 @@
0.19 "" (released xx.xx.2013)
* Support the lzma module in Python >= 3.3
* Optimize repacking of archives with the same format.
Closes: GH bug #1
0.18 "Skyfall" (released 15.12.2012)

View File

@ -580,13 +580,33 @@ def _diff_archives (archive1, archive2, **kwargs):
def _repack_archive (archive1, archive2, **kwargs):
"""Repackage an archive to a different format."""
format1, compression1 = get_archive_format(archive1)
format2, compression2 = get_archive_format(archive2)
if format1 == format2 and compression1 == compression2:
# same format and compression allows to copy the file
try:
util.link_or_copy(archive1, archive2, verbose=kwargs.get('verbose'))
return 0
except OSError:
return 1
tmpdir = util.tmpdir()
try:
same_format = (format1 == format2 and compression1 and compression2)
if same_format:
# only decompress since the format is the same
kwargs['format'] = compression1
_handle_archive(archive1, 'extract', outdir=tmpdir, **kwargs)
archive = os.path.abspath(archive2)
files = tuple(os.listdir(tmpdir))
olddir = os.getcwd()
os.chdir(tmpdir)
_handle_archive(archive, 'create', *files, **kwargs)
try:
if same_format:
# only compress since the format is the same
kwargs['format'] = compression2
_handle_archive(archive, 'create', *files, **kwargs)
finally:
os.chdir(olddir)
return 0
finally:
shutil.rmtree(tmpdir, onerror=rmtree_log_error)

View File

@ -17,6 +17,7 @@
from __future__ import print_function
import os
import sys
import shutil
import subprocess
import mimetypes
import tempfile
@ -477,4 +478,18 @@ def is_same_filename (filename1, filename2):
"""Check if filename1 and filename2 are the same filename."""
return os.path.realpath(filename1) == os.path.realpath(filename2)
def link_or_copy(src, dst, verbose=False):
"""Try to make a hard link from src to dst and if that fails
copy the file. Hard links save some disk space and linking
should fail fast since no copying is involved.
"""
if verbose:
log_info("Copying %s -> %s" % (src, dst))
try:
os.link(src, dst)
except OSError:
shutil.copy(src, dst)
init_mimedb()

View File

@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-
# Copyright (C) 2010-2012 Bastian Kleineidam
# Copyright (C) 2010-2013 Bastian Kleineidam
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@ -21,17 +21,33 @@ from . import datadir, needs_program, needs_one_program
class ArchiveRepackTest (unittest.TestCase):
def repack(self, name1, name2):
"""Repack archive with name1 to archive with name2."""
archive1 = os.path.join(datadir, name1)
tmpdir = patoolib.util.tmpdir()
try:
archive2 = os.path.join(tmpdir, name2)
res = patoolib.handle_archive(archive1, "repack", archive2)
self.assertEqual(res, 0)
res = patoolib.handle_archive(archive1, "diff", archive2)
# both archives have the same data
self.assertEqual(res, 0)
finally:
shutil.rmtree(tmpdir)
@needs_program('diff')
@needs_one_program(('tar', 'star', '7z'))
@needs_one_program(('zip', '7z'))
def test_repack (self):
archive1 = os.path.join(datadir, "t.tar")
tmpdir = patoolib.util.tmpdir()
try:
archive2 = os.path.join(tmpdir, "t.zip")
patoolib.handle_archive(archive1, "repack", archive2)
res = patoolib.handle_archive(archive1, "diff", archive2)
finally:
shutil.rmtree(tmpdir)
# both archives have the same data
self.assertEqual(res, 0)
self.repack('t.tar', 't.zip')
@needs_program('diff')
@needs_one_program(('gzip', '7z'))
@needs_one_program(('bzip2', '7z'))
def test_repack_same_format_different_compression (self):
self.repack('t.tar.gz', 't.tar.bz2')
@needs_program('diff')
def test_repack_same_format (self):
self.repack('t.tar.gz', 't1.tar.gz')
self.repack('t.zip', 't1.zip')