diff --git a/doc/changelog.txt b/doc/changelog.txt index 2a67355..cd2528d 100644 --- a/doc/changelog.txt +++ b/doc/changelog.txt @@ -1,9 +1,12 @@ -1.8 (released xx.xx.xxxx) +1.8 (released 19.7.2015) * Added support for ZPAQ archives. Closes: GH bug #14 * Use options for maximum compression when creating archives and the archive program has such options. +* Added a new "recompress" command which tries to reduce the + size of the compressed archive. +* Add BZIP2 creation to 7z and 7za commands. 1.7 (released 27.6.2014) diff --git a/doc/patool.1 b/doc/patool.1 index 9a5682e..7303429 100644 --- a/doc/patool.1 +++ b/doc/patool.1 @@ -15,9 +15,10 @@ .SH NAME patool - portable archive file manager .SH SYNOPSIS - \fBpatool\fP [\fIglobal-options\fP] (\fBlist\fP|\fBtest\fP|\fBextract\fP|\fBcreate\fP|\fBdiff\fP|\fBsearch\fP|\fBrepack\fP|\fBformats\fP) [\fIcommand-options\fP] <\fIcommand-arguments\fP>... + \fBpatool\fP [\fIglobal-options\fP] (\fBlist\fP|\fBtest\fP|\fBextract\fP|\fBcreate\fP|\fBdiff\fP|\fBsearch\fP|\fBrepack\fP|\fBrecompressfP|\fBformats\fP) [\fIcommand-options\fP] <\fIcommand-arguments\fP>... .SH DESCRIPTION -Various archive formats can be created, extracted, tested, listed, searched, repacked and compared by +Various archive formats can be created, extracted, tested, listed, searched, +repacked, recompressed and compared by \fBpatool\fP. The advantage of patool is its simplicity in handling archive files without having to remember a myriad of programs and options. .PP @@ -44,6 +45,7 @@ installed. \fBpatool diff release1.0.tar.xz release2.0.zip\fP \fBpatool search "def urlopen" python-3.3.tar.gz\fP \fBpatool repack linux-2.6.33.tar.gz linux-2.6.33.tar.bz2\fP + \fBpatool recompress images.zip\fP .SH GLOBAL OPTIONS .TP \fB\-v\fP, \fB\-\-verbose\fP @@ -117,6 +119,11 @@ with the \fBGREP_OPTIONS\fP environment variable. .PP Repackage archive to a different format. The target archive format is determined by the file extension of \fIarchive_new\fP. +.SS recompress +\fBpatool\fP \fBrecompress\fP <\fIarchive\fP> +.PP +Recompress archive to a smaller file size. If the resulting file is +not smaller, the archive is left unchanged. .SS formats \fBpatool\fP \fBformats\fP .PP diff --git a/doc/patool.txt b/doc/patool.txt index 44cdcb0..1d0d1a5 100644 --- a/doc/patool.txt +++ b/doc/patool.txt @@ -7,30 +7,31 @@ NAME SYNOPSIS patool [global-options] (list|test|extract|cre‐ - ate|diff|search|repack|formats) [command-options] ... + ate|diff|search|repack|recompressfP|formats) [command-options] + ... DESCRIPTION Various archive formats can be created, extracted, tested, - listed, searched, repacked and compared by patool. The advan‐ - tage of patool is its simplicity in handling archive files - without having to remember a myriad of programs and options. + listed, searched, repacked, recompressed and compared by + patool. The advantage of patool is its simplicity in handling + archive files without having to remember a myriad of programs + and options. - The archive format is determined by the file(1) program and as + The archive format is determined by the file(1) program and as a fallback by the archive file extension. patool supports 7z (.7z), ACE (.ace), ADF (.adf), ALZIP (.alz), - APE (.ape), AR (.a), ARC (.arc), ARJ (.arj), BZIP2 (.bz2), CAB - (.cab), COMPRESS (.Z), CPIO (.cpio), DEB (.deb), DMS (.dms), - FLAC (.flac), GZIP (.gz), ISO (.iso), LRZIP (.lrz), LZH (.lha, - .lzh), LZIP (.lz), LZMA (.lzma), LZOP (.lzo), RPM (.rpm), RAR - (.rar), RZIP (.rz), SHN (.shn), TAR (.tar), XZ (.xz), ZIP - (.zip, .jar) and ZOO (.zoo) formats. It relies on helper - applications to handle those archive formats (for example bzip2 - for BZIP2 archives). + APE (.ape), AR (.a), ARC (.arc), ARJ (.arj), BZIP2 (.bz2), CAB + (.cab), COMPRESS (.Z), CPIO (.cpio), DEB (.deb), DMS (.dms), + FLAC (.flac), GZIP (.gz), ISO (.iso), LRZIP (.lrz), LZH (.lha, + .lzh), LZIP (.lz), LZMA (.lzma), LZOP (.lzo), RPM (.rpm), RAR + (.rar), RZIP (.rz), SHN (.shn), TAR (.tar), XZ (.xz), ZIP + (.zip, .jar), ZOO (.zoo) and ZPAQ (.zpaq) formats. It relies + on helper applications to handle those archive formats (for + example bzip2 for BZIP2 archives). - The archive formats TAR, ZIP, BZIP2 and GZIP are supported - natively and do not require helper applications to be + The archive formats TAR, ZIP, BZIP2 and GZIP are supported + natively and do not require helper applications to be installed. EXAMPLES @@ -41,10 +42,11 @@ EXAMPLES patool diff release1.0.tar.xz release2.0.zip patool search "def urlopen" python-3.3.tar.gz patool repack linux-2.6.33.tar.gz linux-2.6.33.tar.bz2 + patool recompress images.zip GLOBAL OPTIONS -v, --verbose - Display more info about what patool does, and display + Display more info about what patool does, and display the output of helper applications. Can be given multiple times to increase the output even more. @@ -55,7 +57,7 @@ COMMANDS · The original archive will never be removed. - · Files outside the output directory will never be created. + · Files outside the output directory will never be created. This relies on archive program options to prevent unpacking of files with an absolute path name (eg. --no-abso‐ lute-filenames for cpio(1)). @@ -65,22 +67,22 @@ COMMANDS extract patool extract [--outdir directory] ... - Extract files from given archives. The original archives will + Extract files from given archives. The original archives will never be removed and are left as is. --outdir directory - Extract to the given output directory. Default is to + Extract to the given output directory. Default is to extract to the current working directory. - If the archive contains exactly one file or directory, the ar‐ - chive contents are extracted directly to the output directory. + If the archive contains exactly one file or directory, the ar‐ + chive contents are extracted directly to the output directory. Else the files are extracted in a newly created subdirectory of - the output directory. The new directory is named after the ar‐ + the output directory. The new directory is named after the ar‐ chive filename without the extension. - This prevents cluttering the output directory with a lot of + This prevents cluttering the output directory with a lot of files from the extracted archive. - All extracted files are ensured that they are readable by the + All extracted files are ensured that they are readable by the current user. list @@ -122,21 +124,27 @@ COMMANDS Repackage archive to a different format. The target archive format is determined by the file extension of archive_new. + recompress + patool recompress + + Recompress archive to a smaller file size. If the resulting + file is not smaller, the archive is left unchanged. + formats patool formats - Show all supported archive formats (ie. which helper applica‐ + Show all supported archive formats (ie. which helper applica‐ tions are available). HELP OPTION - Specifying the help option displays help for patool itself, or + Specifying the help option displays help for patool itself, or a command. For example: patool --help - display help for patool patool extract --help - display help for the extract command SHELL ALIASES - When running under a Unix shell the following aliases can be + When running under a Unix shell the following aliases can be defined to save some typing: alias pl='patool list' alias px='patool extract' diff --git a/patool b/patool index 255211e..c51a5a2 100755 --- a/patool +++ b/patool @@ -1,6 +1,6 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -# Copyright (C) 2010-2014 Bastian Kleineidam +# Copyright (C) 2010-2015 Bastian Kleineidam # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -104,6 +104,17 @@ def run_repack(args): return res +def run_recompress(args): + """Recompress an archive to smaller size.""" + res = 0 + try: + patoolib.recompress_archive(args.archive, verbosity=args.verbosity) + except PatoolError as msg: + log_error("error recompressing %s: %s" % (args.archive, msg)) + res = 1 + return res + + def run_formats (args): """List supported and available archive formats.""" patoolib.list_formats() @@ -132,6 +143,7 @@ EXAMPLES patool diff release1.0.tar.xz release2.0.zip patool search "def urlopen" python-3.3.tar.gz patool repack linux-2.6.33.tar.gz linux-2.6.33.tar.bz2 + patool recompress images.zip """ Version = """\ @@ -164,6 +176,9 @@ def create_argparser(): parser_repack = subparsers.add_parser('repack', help='repack an archive to a different format') parser_repack.add_argument('archive_src', help='source archive file') parser_repack.add_argument('archive_dst', help='target archive file') + # recompress + parser_recompress = subparsers.add_parser('recompress', help='recompress an archive to smaller size') + parser_recompress.add_argument('archive', help='an archive file') # diff parser_diff = subparsers.add_parser('diff', help='show differences between two archives') parser_diff.add_argument('archive1', help='the first archive file') diff --git a/patoolib/__init__.py b/patoolib/__init__.py index e4f0b8a..3da2534 100644 --- a/patoolib/__init__.py +++ b/patoolib/__init__.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Copyright (C) 2010-2014 Bastian Kleineidam +# Copyright (C) 2010-2015 Bastian Kleineidam # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -24,7 +24,8 @@ import importlib # PEP 396 from .configuration import Version as __version__ __all__ = ['list_formats', 'list_archive', 'extract_archive', 'test_archive', - 'create_archive', 'diff_archives', 'search_archive', 'repack_archive'] + 'create_archive', 'diff_archives', 'search_archive', 'repack_archive', + 'recompress_archive'] # Supported archive commands @@ -128,10 +129,11 @@ ArchivePrograms = { 'list': ('nomarch',), }, 'bzip2': { - 'extract': ('pbzip2', 'lbzip2', 'bzip2', '7z', '7za', 'py_bz2'), - 'test': ('pbzip2', 'lbzip2', 'bzip2', '7z', '7za'), + None: ('7z', '7za'), + 'extract': ('pbzip2', 'lbzip2', 'bzip2', 'py_bz2'), + 'test': ('pbzip2', 'lbzip2', 'bzip2'), 'create': ('pbzip2', 'lbzip2', 'bzip2', 'py_bz2'), - 'list': ('py_echo', '7z', '7za'), + 'list': ('py_echo'), }, 'cab': { 'extract': ('cabextract', '7z'), @@ -483,7 +485,6 @@ def _extract_archive(archive, verbosity=0, outdir=None, program=None, format=Non pass - def _create_archive(archive, filenames, verbosity=0, program=None, format=None, compression=None): """Create an archive.""" if format is None: @@ -620,6 +621,44 @@ def _repack_archive (archive1, archive2, verbosity=0): shutil.rmtree(tmpdir, onerror=rmtree_log_error) +def _recompress_archive(archive, verbosity=0): + """Try to recompress an archive to smaller size.""" + format, compression = get_archive_format(archive) + if compression: + # only recompress the compression itself (eg. for .tar.xz) + format = compression + tmpdir = util.tmpdir() + tmpdir2 = util.tmpdir() + base, ext = os.path.splitext(os.path.basename(archive)) + archive2 = util.get_single_outfile(tmpdir2, base, extension=ext) + try: + # extract + kwargs = dict(verbosity=verbosity, format=format, outdir=tmpdir) + path = _extract_archive(archive, **kwargs) + # compress to new file + olddir = os.getcwd() + os.chdir(path) + try: + kwargs = dict(verbosity=verbosity, format=format) + files = tuple(os.listdir(path)) + _create_archive(archive2, files, **kwargs) + finally: + os.chdir(olddir) + # check file sizes and replace if new file is smaller + filesize = util.get_filesize(archive) + filesize2 = util.get_filesize(archive2) + if filesize2 < filesize: + # replace file + os.remove(archive) + shutil.move(archive2, archive) + diffsize = filesize - filesize2 + return "... recompressed file is now %s smaller." % util.strsize(diffsize) + finally: + shutil.rmtree(tmpdir, onerror=rmtree_log_error) + shutil.rmtree(tmpdir2, onerror=rmtree_log_error) + return "... recompressed file is not smaller, leaving archive as is." + + # the patool library API def extract_archive(archive, verbosity=0, outdir=None, program=None): @@ -696,3 +735,15 @@ def repack_archive (archive, archive_new, verbosity=0): if verbosity >= 0: util.log_info("... repacking successful.") return res + + +def recompress_archive(archive, verbosity=0): + """Recompress an archive to hopefully smaller size.""" + util.check_existing_filename(archive) + util.check_writable_filename(archive) + if verbosity >= 0: + util.log_info("Recompressing %s ..." % (archive,)) + res = _recompress_archive(archive, verbosity=verbosity) + if res and verbosity >= 0: + util.log_info(res) + return 0 diff --git a/patoolib/configuration.py b/patoolib/configuration.py index 0c24d8d..7b40999 100644 --- a/patoolib/configuration.py +++ b/patoolib/configuration.py @@ -1,4 +1,4 @@ -# Copyright (C) 2013-2014 Bastian Kleineidam +# Copyright (C) 2013-2015 Bastian Kleineidam """ Define basic configuration data like version or application name. """ @@ -10,7 +10,7 @@ AppName = configdata.name App = AppName+" "+Version Author = configdata.author Maintainer = configdata.maintainer -Copyright = "Copyright (C) 2004-2014 " + Author +Copyright = "Copyright (C) 2004-2015 " + Author Url = configdata.url SupportUrl = Url + "issues" Email = configdata.author_email diff --git a/patoolib/programs/p7azip.py b/patoolib/programs/p7azip.py index ab21847..6f2beb3 100644 --- a/patoolib/programs/p7azip.py +++ b/patoolib/programs/p7azip.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Copyright (C) 2010-2014 Bastian Kleineidam +# Copyright (C) 2010-2015 Bastian Kleineidam # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -45,4 +45,5 @@ from .p7zip import \ test_7z, \ create_7z, \ create_zip, \ - create_gzip + create_gzip, \ + create_bzip2 diff --git a/patoolib/programs/p7zip.py b/patoolib/programs/p7zip.py index dfff888..af9b017 100644 --- a/patoolib/programs/p7zip.py +++ b/patoolib/programs/p7zip.py @@ -82,13 +82,36 @@ test_bzip2 = \ test_7z -def create_7z (archive, compression, cmd, verbosity, filenames): +def create_7z(archive, compression, cmd, verbosity, filenames): """Create a 7z archive.""" - cmdlist = [cmd, 'a', '-mx=9', '--', archive] + cmdlist = [cmd, 'a', '-t7z', '-mx=9', '--', archive] cmdlist.extend(filenames) return cmdlist -create_zip = \ - create_xz = \ - create_gzip = \ - create_7z + +def create_zip(archive, compression, cmd, verbosity, filenames): + """Create a ZIP archive.""" + cmdlist = [cmd, 'a', '-tzip', '-mx=9', '--', archive] + cmdlist.extend(filenames) + return cmdlist + + +def create_xz(archive, compression, cmd, verbosity, filenames): + """Create an XZ archive.""" + cmdlist = [cmd, 'a', '-txz', '-mx=9', '--', archive] + cmdlist.extend(filenames) + return cmdlist + + +def create_gzip(archive, compression, cmd, verbosity, filenames): + """Create a GZIP archive.""" + cmdlist = [cmd, 'a', '-tgzip', '-mx=9', '--', archive] + cmdlist.extend(filenames) + return cmdlist + + +def create_bzip2(archive, compression, cmd, verbosity, filenames): + """Create a BZIP2 archive.""" + cmdlist = [cmd, 'a', '-tbzip2', '-mx=9', '--', archive] + cmdlist.extend(filenames) + return cmdlist diff --git a/patoolib/util.py b/patoolib/util.py index d1e29c7..a9b6f0c 100644 --- a/patoolib/util.py +++ b/patoolib/util.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Copyright (C) 2010-2014 Bastian Kleineidam +# Copyright (C) 2010-2015 Bastian Kleineidam # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -23,6 +23,7 @@ import mimetypes import tempfile import time import traceback +import locale from . import configuration, ArchiveMimetypes, ArchiveCompressions try: from shutil import which @@ -283,7 +284,12 @@ def guess_mime_file (filename): # ignore errors, as file(1) is only a fallback return mime, encoding mime2 = outparts[0].split(" ", 1)[0] - if mime2 in ('application/x-empty', 'application/octet-stream'): + # Some file(1) implementations return an empty or unknown mime type + # when the uncompressor program is not installed, other + # implementation return the original file type. + # The following detects both cases. + if (mime2 in ('application/x-empty', 'application/octet-stream') or + mime2 in Mime2Encoding): # The uncompressor program file(1) uses is not installed # or is not able to uncompress. # Try to get mime information from the file extension. @@ -382,6 +388,12 @@ def check_existing_filename (filename, onlyfiles=True): raise PatoolError("`%s' is not a file" % filename) +def check_writable_filename(filename): + """Ensure that the given filename is writable.""" + if not os.access(filename, os.W_OK): + raise PatoolError("file `%s' is not writable" % filename) + + def check_new_filename (filename): """Check that filename does not already exist.""" if os.path.exists(filename): @@ -410,6 +422,31 @@ def set_mode (filename, flags): log_error("could not set mode flags for `%s': %s" % (filename, msg)) +def get_filesize(filename): + """Return file size in Bytes, or -1 on error.""" + return os.path.getsize(filename) + + +def strsize(b, grouping=True): + """Return human representation of bytes b. A negative number of bytes + raises a value error.""" + if b < 0: + raise ValueError("Invalid negative byte number") + if b < 1024: + return u"%sB" % locale.format("%d", b, grouping) + if b < 1024 * 10: + return u"%sKB" % locale.format("%d", (b // 1024), grouping) + if b < 1024 * 1024: + return u"%sKB" % locale.format("%.2f", (float(b) / 1024), grouping) + if b < 1024 * 1024 * 10: + return u"%sMB" % locale.format("%.2f", (float(b) / (1024*1024)), grouping) + if b < 1024 * 1024 * 1024: + return u"%sMB" % locale.format("%.1f", (float(b) / (1024*1024)), grouping) + if b < 1024 * 1024 * 1024 * 10: + return u"%sGB" % locale.format("%.2f", (float(b) / (1024*1024*1024)), grouping) + return u"%sGB" % locale.format("%.1f", (float(b) / (1024*1024*1024)), grouping) + + def tmpdir (dir=None): """Return a temporary directory for extraction.""" return tempfile.mkdtemp(suffix='', prefix='Unpack_', dir=dir) diff --git a/tests/archives/test_7z.py b/tests/archives/test_7z.py index 97ed4bd..48cbefe 100644 --- a/tests/archives/test_7z.py +++ b/tests/archives/test_7z.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Copyright (C) 2010-2014 Bastian Kleineidam +# Copyright (C) 2010-2015 Bastian Kleineidam # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -59,6 +59,7 @@ class Test7z (ArchiveTest): self.archive_test('t.deb') self.archive_test('t.iso') self.archive_create('t.txt.gz', check=Content.Singlefile) + self.archive_create('t.txt.bz2', check=Content.Singlefile) @needs_codec(program, 'rar') def test_7z_rar (self): diff --git a/tests/archives/test_7za.py b/tests/archives/test_7za.py index b2d592c..af3a442 100644 --- a/tests/archives/test_7za.py +++ b/tests/archives/test_7za.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Copyright (C) 2010-2014 Bastian Kleineidam +# Copyright (C) 2010-2015 Bastian Kleineidam # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -41,6 +41,7 @@ class Test7za (ArchiveTest): self.archive_test('t.txt.Z') self.archive_test('t.cab') self.archive_create('t.txt.gz', check=Content.Singlefile) + self.archive_create('t.txt.bz2', check=Content.Singlefile) @needs_program('file') @needs_program(program) diff --git a/tests/test_mime.py b/tests/test_mime.py index a5fa353..edfa076 100644 --- a/tests/test_mime.py +++ b/tests/test_mime.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Copyright (C) 2010-2014 Bastian Kleineidam +# Copyright (C) 2010-2015 Bastian Kleineidam # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -125,7 +125,7 @@ class TestMime (unittest.TestCase): self.mime_test_file("t.chm.foo", "application/x-chm") self.mime_test_file("t.iso", "application/x-iso9660-image") self.mime_test_file("t.epub", "application/zip") - self.mime_test_file("t.apk", "application/zip") + self.mime_test_file("t.apk", ("application/zip", "application/java-archive")) self.mime_test_file("t.zpaq", "application/zpaq") self.mime_test_file("t.zpaq.foo", "application/zpaq") @@ -182,6 +182,7 @@ class TestMime (unittest.TestCase): self.mime_test_mimedb("t.tbz2", "application/x-tar", "bzip2") self.mime_test_mimedb("t.tgz", "application/x-tar", "gzip") self.mime_test_mimedb("t.txt.gz", "application/gzip") + self.mime_test_mimedb("t.txt.bz2", "application/x-bzip2") self.mime_test_mimedb("t .xz", "application/x-xz") self.mime_test_mimedb("t.Z", "application/x-compress") self.mime_test_mimedb("t.zip", ("application/zip", "application/x-zip-compressed")) diff --git a/tests/test_recompress.py b/tests/test_recompress.py new file mode 100644 index 0000000..9b7ebe4 --- /dev/null +++ b/tests/test_recompress.py @@ -0,0 +1,40 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2010-2015 Bastian Kleineidam +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +import unittest +import os +import sys +import shutil +from patoolib import util +from . import datadir, needs_one_program, patool_cmd + +class ArchiveRecompressTest (unittest.TestCase): + + def recompress(self, name): + """Recompress archive with given name.""" + archive = os.path.join(datadir, name) + ext = os.path.splitext(archive)[1] + tmpfile = util.tmpfile(suffix=ext) + try: + shutil.copy(archive, tmpfile) + util.run_checked([sys.executable, patool_cmd, "-vv", "recompress", tmpfile]) + finally: + if os.path.exists(tmpfile): + os.remove(tmpfile) + + @needs_one_program(('zip', '7z')) + def test_repack (self): + self.recompress('t.zip') +