Add new recompress command

This commit is contained in:
Bastian Kleineidam 2015-07-19 17:29:33 +02:00
parent 93f0caeecc
commit bd111dc7e2
13 changed files with 242 additions and 54 deletions

View File

@ -1,9 +1,12 @@
1.8 (released xx.xx.xxxx)
1.8 (released 19.7.2015)
* Added support for ZPAQ archives.
Closes: GH bug #14
* Use options for maximum compression when creating archives
and the archive program has such options.
* Added a new "recompress" command which tries to reduce the
size of the compressed archive.
* Add BZIP2 creation to 7z and 7za commands.
1.7 (released 27.6.2014)

View File

@ -15,9 +15,10 @@
.SH NAME
patool - portable archive file manager
.SH SYNOPSIS
\fBpatool\fP [\fIglobal-options\fP] (\fBlist\fP|\fBtest\fP|\fBextract\fP|\fBcreate\fP|\fBdiff\fP|\fBsearch\fP|\fBrepack\fP|\fBformats\fP) [\fIcommand-options\fP] <\fIcommand-arguments\fP>...
\fBpatool\fP [\fIglobal-options\fP] (\fBlist\fP|\fBtest\fP|\fBextract\fP|\fBcreate\fP|\fBdiff\fP|\fBsearch\fP|\fBrepack\fP|\fBrecompressfP|\fBformats\fP) [\fIcommand-options\fP] <\fIcommand-arguments\fP>...
.SH DESCRIPTION
Various archive formats can be created, extracted, tested, listed, searched, repacked and compared by
Various archive formats can be created, extracted, tested, listed, searched,
repacked, recompressed and compared by
\fBpatool\fP. The advantage of patool is its simplicity in handling archive
files without having to remember a myriad of programs and options.
.PP
@ -44,6 +45,7 @@ installed.
\fBpatool diff release1.0.tar.xz release2.0.zip\fP
\fBpatool search "def urlopen" python-3.3.tar.gz\fP
\fBpatool repack linux-2.6.33.tar.gz linux-2.6.33.tar.bz2\fP
\fBpatool recompress images.zip\fP
.SH GLOBAL OPTIONS
.TP
\fB\-v\fP, \fB\-\-verbose\fP
@ -117,6 +119,11 @@ with the \fBGREP_OPTIONS\fP environment variable.
.PP
Repackage archive to a different format. The target archive format is
determined by the file extension of \fIarchive_new\fP.
.SS recompress
\fBpatool\fP \fBrecompress\fP <\fIarchive\fP>
.PP
Recompress archive to a smaller file size. If the resulting file is
not smaller, the archive is left unchanged.
.SS formats
\fBpatool\fP \fBformats\fP
.PP

View File

@ -7,14 +7,15 @@ NAME
SYNOPSIS
patool [global-options] (list|test|extract|cre
ate|diff|search|repack|formats) [command-options] <command-
arguments>...
ate|diff|search|repack|recompressfP|formats) [command-options]
<command-arguments>...
DESCRIPTION
Various archive formats can be created, extracted, tested,
listed, searched, repacked and compared by patool. The advan
tage of patool is its simplicity in handling archive files
without having to remember a myriad of programs and options.
listed, searched, repacked, recompressed and compared by
patool. The advantage of patool is its simplicity in handling
archive files without having to remember a myriad of programs
and options.
The archive format is determined by the file(1) program and as
a fallback by the archive file extension.
@ -25,9 +26,9 @@ DESCRIPTION
FLAC (.flac), GZIP (.gz), ISO (.iso), LRZIP (.lrz), LZH (.lha,
.lzh), LZIP (.lz), LZMA (.lzma), LZOP (.lzo), RPM (.rpm), RAR
(.rar), RZIP (.rz), SHN (.shn), TAR (.tar), XZ (.xz), ZIP
(.zip, .jar) and ZOO (.zoo) formats. It relies on helper
applications to handle those archive formats (for example bzip2
for BZIP2 archives).
(.zip, .jar), ZOO (.zoo) and ZPAQ (.zpaq) formats. It relies
on helper applications to handle those archive formats (for
example bzip2 for BZIP2 archives).
The archive formats TAR, ZIP, BZIP2 and GZIP are supported
natively and do not require helper applications to be
@ -41,6 +42,7 @@ EXAMPLES
patool diff release1.0.tar.xz release2.0.zip
patool search "def urlopen" python-3.3.tar.gz
patool repack linux-2.6.33.tar.gz linux-2.6.33.tar.bz2
patool recompress images.zip
GLOBAL OPTIONS
-v, --verbose
@ -122,6 +124,12 @@ COMMANDS
Repackage archive to a different format. The target archive
format is determined by the file extension of archive_new.
recompress
patool recompress <archive>
Recompress archive to a smaller file size. If the resulting
file is not smaller, the archive is left unchanged.
formats
patool formats

17
patool
View File

@ -1,6 +1,6 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (C) 2010-2014 Bastian Kleineidam
# Copyright (C) 2010-2015 Bastian Kleineidam
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@ -104,6 +104,17 @@ def run_repack(args):
return res
def run_recompress(args):
"""Recompress an archive to smaller size."""
res = 0
try:
patoolib.recompress_archive(args.archive, verbosity=args.verbosity)
except PatoolError as msg:
log_error("error recompressing %s: %s" % (args.archive, msg))
res = 1
return res
def run_formats (args):
"""List supported and available archive formats."""
patoolib.list_formats()
@ -132,6 +143,7 @@ EXAMPLES
patool diff release1.0.tar.xz release2.0.zip
patool search "def urlopen" python-3.3.tar.gz
patool repack linux-2.6.33.tar.gz linux-2.6.33.tar.bz2
patool recompress images.zip
"""
Version = """\
@ -164,6 +176,9 @@ def create_argparser():
parser_repack = subparsers.add_parser('repack', help='repack an archive to a different format')
parser_repack.add_argument('archive_src', help='source archive file')
parser_repack.add_argument('archive_dst', help='target archive file')
# recompress
parser_recompress = subparsers.add_parser('recompress', help='recompress an archive to smaller size')
parser_recompress.add_argument('archive', help='an archive file')
# diff
parser_diff = subparsers.add_parser('diff', help='show differences between two archives')
parser_diff.add_argument('archive1', help='the first archive file')

View File

@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-
# Copyright (C) 2010-2014 Bastian Kleineidam
# Copyright (C) 2010-2015 Bastian Kleineidam
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@ -24,7 +24,8 @@ import importlib
# PEP 396
from .configuration import Version as __version__
__all__ = ['list_formats', 'list_archive', 'extract_archive', 'test_archive',
'create_archive', 'diff_archives', 'search_archive', 'repack_archive']
'create_archive', 'diff_archives', 'search_archive', 'repack_archive',
'recompress_archive']
# Supported archive commands
@ -128,10 +129,11 @@ ArchivePrograms = {
'list': ('nomarch',),
},
'bzip2': {
'extract': ('pbzip2', 'lbzip2', 'bzip2', '7z', '7za', 'py_bz2'),
'test': ('pbzip2', 'lbzip2', 'bzip2', '7z', '7za'),
None: ('7z', '7za'),
'extract': ('pbzip2', 'lbzip2', 'bzip2', 'py_bz2'),
'test': ('pbzip2', 'lbzip2', 'bzip2'),
'create': ('pbzip2', 'lbzip2', 'bzip2', 'py_bz2'),
'list': ('py_echo', '7z', '7za'),
'list': ('py_echo'),
},
'cab': {
'extract': ('cabextract', '7z'),
@ -483,7 +485,6 @@ def _extract_archive(archive, verbosity=0, outdir=None, program=None, format=Non
pass
def _create_archive(archive, filenames, verbosity=0, program=None, format=None, compression=None):
"""Create an archive."""
if format is None:
@ -620,6 +621,44 @@ def _repack_archive (archive1, archive2, verbosity=0):
shutil.rmtree(tmpdir, onerror=rmtree_log_error)
def _recompress_archive(archive, verbosity=0):
"""Try to recompress an archive to smaller size."""
format, compression = get_archive_format(archive)
if compression:
# only recompress the compression itself (eg. for .tar.xz)
format = compression
tmpdir = util.tmpdir()
tmpdir2 = util.tmpdir()
base, ext = os.path.splitext(os.path.basename(archive))
archive2 = util.get_single_outfile(tmpdir2, base, extension=ext)
try:
# extract
kwargs = dict(verbosity=verbosity, format=format, outdir=tmpdir)
path = _extract_archive(archive, **kwargs)
# compress to new file
olddir = os.getcwd()
os.chdir(path)
try:
kwargs = dict(verbosity=verbosity, format=format)
files = tuple(os.listdir(path))
_create_archive(archive2, files, **kwargs)
finally:
os.chdir(olddir)
# check file sizes and replace if new file is smaller
filesize = util.get_filesize(archive)
filesize2 = util.get_filesize(archive2)
if filesize2 < filesize:
# replace file
os.remove(archive)
shutil.move(archive2, archive)
diffsize = filesize - filesize2
return "... recompressed file is now %s smaller." % util.strsize(diffsize)
finally:
shutil.rmtree(tmpdir, onerror=rmtree_log_error)
shutil.rmtree(tmpdir2, onerror=rmtree_log_error)
return "... recompressed file is not smaller, leaving archive as is."
# the patool library API
def extract_archive(archive, verbosity=0, outdir=None, program=None):
@ -696,3 +735,15 @@ def repack_archive (archive, archive_new, verbosity=0):
if verbosity >= 0:
util.log_info("... repacking successful.")
return res
def recompress_archive(archive, verbosity=0):
"""Recompress an archive to hopefully smaller size."""
util.check_existing_filename(archive)
util.check_writable_filename(archive)
if verbosity >= 0:
util.log_info("Recompressing %s ..." % (archive,))
res = _recompress_archive(archive, verbosity=verbosity)
if res and verbosity >= 0:
util.log_info(res)
return 0

View File

@ -1,4 +1,4 @@
# Copyright (C) 2013-2014 Bastian Kleineidam
# Copyright (C) 2013-2015 Bastian Kleineidam
"""
Define basic configuration data like version or application name.
"""
@ -10,7 +10,7 @@ AppName = configdata.name
App = AppName+" "+Version
Author = configdata.author
Maintainer = configdata.maintainer
Copyright = "Copyright (C) 2004-2014 " + Author
Copyright = "Copyright (C) 2004-2015 " + Author
Url = configdata.url
SupportUrl = Url + "issues"
Email = configdata.author_email

View File

@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-
# Copyright (C) 2010-2014 Bastian Kleineidam
# Copyright (C) 2010-2015 Bastian Kleineidam
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@ -45,4 +45,5 @@ from .p7zip import \
test_7z, \
create_7z, \
create_zip, \
create_gzip
create_gzip, \
create_bzip2

View File

@ -84,11 +84,34 @@ test_bzip2 = \
def create_7z(archive, compression, cmd, verbosity, filenames):
"""Create a 7z archive."""
cmdlist = [cmd, 'a', '-mx=9', '--', archive]
cmdlist = [cmd, 'a', '-t7z', '-mx=9', '--', archive]
cmdlist.extend(filenames)
return cmdlist
create_zip = \
create_xz = \
create_gzip = \
create_7z
def create_zip(archive, compression, cmd, verbosity, filenames):
"""Create a ZIP archive."""
cmdlist = [cmd, 'a', '-tzip', '-mx=9', '--', archive]
cmdlist.extend(filenames)
return cmdlist
def create_xz(archive, compression, cmd, verbosity, filenames):
"""Create an XZ archive."""
cmdlist = [cmd, 'a', '-txz', '-mx=9', '--', archive]
cmdlist.extend(filenames)
return cmdlist
def create_gzip(archive, compression, cmd, verbosity, filenames):
"""Create a GZIP archive."""
cmdlist = [cmd, 'a', '-tgzip', '-mx=9', '--', archive]
cmdlist.extend(filenames)
return cmdlist
def create_bzip2(archive, compression, cmd, verbosity, filenames):
"""Create a BZIP2 archive."""
cmdlist = [cmd, 'a', '-tbzip2', '-mx=9', '--', archive]
cmdlist.extend(filenames)
return cmdlist

View File

@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-
# Copyright (C) 2010-2014 Bastian Kleineidam
# Copyright (C) 2010-2015 Bastian Kleineidam
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@ -23,6 +23,7 @@ import mimetypes
import tempfile
import time
import traceback
import locale
from . import configuration, ArchiveMimetypes, ArchiveCompressions
try:
from shutil import which
@ -283,7 +284,12 @@ def guess_mime_file (filename):
# ignore errors, as file(1) is only a fallback
return mime, encoding
mime2 = outparts[0].split(" ", 1)[0]
if mime2 in ('application/x-empty', 'application/octet-stream'):
# Some file(1) implementations return an empty or unknown mime type
# when the uncompressor program is not installed, other
# implementation return the original file type.
# The following detects both cases.
if (mime2 in ('application/x-empty', 'application/octet-stream') or
mime2 in Mime2Encoding):
# The uncompressor program file(1) uses is not installed
# or is not able to uncompress.
# Try to get mime information from the file extension.
@ -382,6 +388,12 @@ def check_existing_filename (filename, onlyfiles=True):
raise PatoolError("`%s' is not a file" % filename)
def check_writable_filename(filename):
"""Ensure that the given filename is writable."""
if not os.access(filename, os.W_OK):
raise PatoolError("file `%s' is not writable" % filename)
def check_new_filename (filename):
"""Check that filename does not already exist."""
if os.path.exists(filename):
@ -410,6 +422,31 @@ def set_mode (filename, flags):
log_error("could not set mode flags for `%s': %s" % (filename, msg))
def get_filesize(filename):
"""Return file size in Bytes, or -1 on error."""
return os.path.getsize(filename)
def strsize(b, grouping=True):
"""Return human representation of bytes b. A negative number of bytes
raises a value error."""
if b < 0:
raise ValueError("Invalid negative byte number")
if b < 1024:
return u"%sB" % locale.format("%d", b, grouping)
if b < 1024 * 10:
return u"%sKB" % locale.format("%d", (b // 1024), grouping)
if b < 1024 * 1024:
return u"%sKB" % locale.format("%.2f", (float(b) / 1024), grouping)
if b < 1024 * 1024 * 10:
return u"%sMB" % locale.format("%.2f", (float(b) / (1024*1024)), grouping)
if b < 1024 * 1024 * 1024:
return u"%sMB" % locale.format("%.1f", (float(b) / (1024*1024)), grouping)
if b < 1024 * 1024 * 1024 * 10:
return u"%sGB" % locale.format("%.2f", (float(b) / (1024*1024*1024)), grouping)
return u"%sGB" % locale.format("%.1f", (float(b) / (1024*1024*1024)), grouping)
def tmpdir (dir=None):
"""Return a temporary directory for extraction."""
return tempfile.mkdtemp(suffix='', prefix='Unpack_', dir=dir)

View File

@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-
# Copyright (C) 2010-2014 Bastian Kleineidam
# Copyright (C) 2010-2015 Bastian Kleineidam
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@ -59,6 +59,7 @@ class Test7z (ArchiveTest):
self.archive_test('t.deb')
self.archive_test('t.iso')
self.archive_create('t.txt.gz', check=Content.Singlefile)
self.archive_create('t.txt.bz2', check=Content.Singlefile)
@needs_codec(program, 'rar')
def test_7z_rar (self):

View File

@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-
# Copyright (C) 2010-2014 Bastian Kleineidam
# Copyright (C) 2010-2015 Bastian Kleineidam
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@ -41,6 +41,7 @@ class Test7za (ArchiveTest):
self.archive_test('t.txt.Z')
self.archive_test('t.cab')
self.archive_create('t.txt.gz', check=Content.Singlefile)
self.archive_create('t.txt.bz2', check=Content.Singlefile)
@needs_program('file')
@needs_program(program)

View File

@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-
# Copyright (C) 2010-2014 Bastian Kleineidam
# Copyright (C) 2010-2015 Bastian Kleineidam
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@ -125,7 +125,7 @@ class TestMime (unittest.TestCase):
self.mime_test_file("t.chm.foo", "application/x-chm")
self.mime_test_file("t.iso", "application/x-iso9660-image")
self.mime_test_file("t.epub", "application/zip")
self.mime_test_file("t.apk", "application/zip")
self.mime_test_file("t.apk", ("application/zip", "application/java-archive"))
self.mime_test_file("t.zpaq", "application/zpaq")
self.mime_test_file("t.zpaq.foo", "application/zpaq")
@ -182,6 +182,7 @@ class TestMime (unittest.TestCase):
self.mime_test_mimedb("t.tbz2", "application/x-tar", "bzip2")
self.mime_test_mimedb("t.tgz", "application/x-tar", "gzip")
self.mime_test_mimedb("t.txt.gz", "application/gzip")
self.mime_test_mimedb("t.txt.bz2", "application/x-bzip2")
self.mime_test_mimedb("t .xz", "application/x-xz")
self.mime_test_mimedb("t.Z", "application/x-compress")
self.mime_test_mimedb("t.zip", ("application/zip", "application/x-zip-compressed"))

40
tests/test_recompress.py Normal file
View File

@ -0,0 +1,40 @@
# -*- coding: utf-8 -*-
# Copyright (C) 2010-2015 Bastian Kleineidam
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import unittest
import os
import sys
import shutil
from patoolib import util
from . import datadir, needs_one_program, patool_cmd
class ArchiveRecompressTest (unittest.TestCase):
def recompress(self, name):
"""Recompress archive with given name."""
archive = os.path.join(datadir, name)
ext = os.path.splitext(archive)[1]
tmpfile = util.tmpfile(suffix=ext)
try:
shutil.copy(archive, tmpfile)
util.run_checked([sys.executable, patool_cmd, "-vv", "recompress", tmpfile])
finally:
if os.path.exists(tmpfile):
os.remove(tmpfile)
@needs_one_program(('zip', '7z'))
def test_repack (self):
self.recompress('t.zip')