From 69ce2740ca10cc5e22fd5974ef0940c467a38c28 Mon Sep 17 00:00:00 2001 From: Bastian Kleineidam Date: Thu, 21 Feb 2013 17:51:34 +0100 Subject: [PATCH] Optimize repack command. --- doc/changelog.txt | 2 ++ patoolib/__init__.py | 22 +++++++++++++++++++++- patoolib/util.py | 15 +++++++++++++++ tests/test_repack.py | 38 +++++++++++++++++++++++++++----------- 4 files changed, 65 insertions(+), 12 deletions(-) diff --git a/doc/changelog.txt b/doc/changelog.txt index 2e165bc..60c848f 100644 --- a/doc/changelog.txt +++ b/doc/changelog.txt @@ -1,6 +1,8 @@ 0.19 "" (released xx.xx.2013) * Support the lzma module in Python >= 3.3 +* Optimize repacking of archives with the same format. + Closes: GH bug #1 0.18 "Skyfall" (released 15.12.2012) diff --git a/patoolib/__init__.py b/patoolib/__init__.py index 62aae81..5e7681f 100644 --- a/patoolib/__init__.py +++ b/patoolib/__init__.py @@ -580,13 +580,33 @@ def _diff_archives (archive1, archive2, **kwargs): def _repack_archive (archive1, archive2, **kwargs): """Repackage an archive to a different format.""" + format1, compression1 = get_archive_format(archive1) + format2, compression2 = get_archive_format(archive2) + if format1 == format2 and compression1 == compression2: + # same format and compression allows to copy the file + try: + util.link_or_copy(archive1, archive2, verbose=kwargs.get('verbose')) + return 0 + except OSError: + return 1 tmpdir = util.tmpdir() try: + same_format = (format1 == format2 and compression1 and compression2) + if same_format: + # only decompress since the format is the same + kwargs['format'] = compression1 _handle_archive(archive1, 'extract', outdir=tmpdir, **kwargs) archive = os.path.abspath(archive2) files = tuple(os.listdir(tmpdir)) + olddir = os.getcwd() os.chdir(tmpdir) - _handle_archive(archive, 'create', *files, **kwargs) + try: + if same_format: + # only compress since the format is the same + kwargs['format'] = compression2 + _handle_archive(archive, 'create', *files, **kwargs) + finally: + os.chdir(olddir) return 0 finally: shutil.rmtree(tmpdir, onerror=rmtree_log_error) diff --git a/patoolib/util.py b/patoolib/util.py index e21559b..1601479 100644 --- a/patoolib/util.py +++ b/patoolib/util.py @@ -17,6 +17,7 @@ from __future__ import print_function import os import sys +import shutil import subprocess import mimetypes import tempfile @@ -477,4 +478,18 @@ def is_same_filename (filename1, filename2): """Check if filename1 and filename2 are the same filename.""" return os.path.realpath(filename1) == os.path.realpath(filename2) + +def link_or_copy(src, dst, verbose=False): + """Try to make a hard link from src to dst and if that fails + copy the file. Hard links save some disk space and linking + should fail fast since no copying is involved. + """ + if verbose: + log_info("Copying %s -> %s" % (src, dst)) + try: + os.link(src, dst) + except OSError: + shutil.copy(src, dst) + + init_mimedb() diff --git a/tests/test_repack.py b/tests/test_repack.py index b455a73..0b35e8c 100644 --- a/tests/test_repack.py +++ b/tests/test_repack.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Copyright (C) 2010-2012 Bastian Kleineidam +# Copyright (C) 2010-2013 Bastian Kleineidam # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -21,17 +21,33 @@ from . import datadir, needs_program, needs_one_program class ArchiveRepackTest (unittest.TestCase): + def repack(self, name1, name2): + """Repack archive with name1 to archive with name2.""" + archive1 = os.path.join(datadir, name1) + tmpdir = patoolib.util.tmpdir() + try: + archive2 = os.path.join(tmpdir, name2) + res = patoolib.handle_archive(archive1, "repack", archive2) + self.assertEqual(res, 0) + res = patoolib.handle_archive(archive1, "diff", archive2) + # both archives have the same data + self.assertEqual(res, 0) + finally: + shutil.rmtree(tmpdir) + @needs_program('diff') @needs_one_program(('tar', 'star', '7z')) @needs_one_program(('zip', '7z')) def test_repack (self): - archive1 = os.path.join(datadir, "t.tar") - tmpdir = patoolib.util.tmpdir() - try: - archive2 = os.path.join(tmpdir, "t.zip") - patoolib.handle_archive(archive1, "repack", archive2) - res = patoolib.handle_archive(archive1, "diff", archive2) - finally: - shutil.rmtree(tmpdir) - # both archives have the same data - self.assertEqual(res, 0) + self.repack('t.tar', 't.zip') + + @needs_program('diff') + @needs_one_program(('gzip', '7z')) + @needs_one_program(('bzip2', '7z')) + def test_repack_same_format_different_compression (self): + self.repack('t.tar.gz', 't.tar.bz2') + + @needs_program('diff') + def test_repack_same_format (self): + self.repack('t.tar.gz', 't1.tar.gz') + self.repack('t.zip', 't1.zip')