From 45a5f7e8413dd99063940123b3be03c80e829120 Mon Sep 17 00:00:00 2001 From: Bastian Kleineidam Date: Fri, 11 May 2012 22:08:00 +0200 Subject: [PATCH] Support extraction of BZIP2 files with the Python bz2 module. --- doc/changelog.txt | 2 ++ patoolib/__init__.py | 8 +++++-- patoolib/programs/pybz2.py | 46 ++++++++++++++++++++++++++++++++++++++ patoolib/util.py | 10 ++++++++- tests/test_archives.py | 4 ++++ tests/test_util.py | 2 +- 6 files changed, 68 insertions(+), 4 deletions(-) create mode 100644 patoolib/programs/pybz2.py diff --git a/doc/changelog.txt b/doc/changelog.txt index e0b0e13..bc92b25 100644 --- a/doc/changelog.txt +++ b/doc/changelog.txt @@ -11,6 +11,8 @@ * Added support for the lbzip2 program handling BZIP2 archives. * Added support for the plzip program handling LZIP archives. * Prevent overwriting the same file with repack. +* Support extraction of BZIP2 files with the Python bz2 module. + 0.15 "Contraband" (released 8.4.2012) diff --git a/patoolib/__init__.py b/patoolib/__init__.py index b705b9b..d4df88c 100644 --- a/patoolib/__init__.py +++ b/patoolib/__init__.py @@ -100,7 +100,7 @@ ArchivePrograms = { }, 'bzip2': { None: ('7z', '7za'), - 'extract': ('pbzip2', 'lbzip2', 'bzip2'), + 'extract': ('pbzip2', 'lbzip2', 'bzip2', 'pybz2'), 'test': ('pbzip2', 'lbzip2', 'bzip2'), 'create': ('pbzip2', 'lbzip2', 'bzip2'), 'list': ('echo',), @@ -452,7 +452,11 @@ def _handle_archive (archive, command, *args, **kwargs): archive = util.tmpfile(dir=os.path.dirname(archive), suffix=".arc") try: cmdlist = get_archive_cmdlist(archive, encoding, program, *args, **cmd_kwargs) - run_archive_cmdlist(cmdlist) + if cmdlist: + # an empty command list means the get_archive_cmdlist() function + # already handled the command (eg. when it's a builting Python + # function) + run_archive_cmdlist(cmdlist) if command == 'extract': if do_cleanup_outdir: target, msg = cleanup_outdir(cmd_kwargs["outdir"]) diff --git a/patoolib/programs/pybz2.py b/patoolib/programs/pybz2.py new file mode 100644 index 0000000..fa0c88d --- /dev/null +++ b/patoolib/programs/pybz2.py @@ -0,0 +1,46 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2012 Bastian Kleineidam +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +"""Archive commands for the bz2 Python module.""" +from patoolib import util +try: + # try external bz2file module with multi-stream support + import bz2file as bz2 +except ImportError: + import bz2 + +READ_SIZE_BYTES = 1024*1024 + +def extract_bzip2 (archive, encoding, cmd, **kwargs): + """Extract a BZIP2 archive with the bz2 Python module functionality.""" + verbose = kwargs['verbose'] + if verbose: + util.log_info('extracting %s...' % archive) + targetname = util.get_single_outfile(kwargs['outdir'], archive) + bz2file = bz2.BZ2File(archive) + try: + targetfile = open(targetname, 'wb') + try: + data = bz2file.read(READ_SIZE_BYTES) + while data: + targetfile.write(data) + data = bz2file.read(READ_SIZE_BYTES) + finally: + targetfile.close() + finally: + bz2file.close() + if verbose: + util.log_info('... extracted to %s' % targetname) + return None diff --git a/patoolib/util.py b/patoolib/util.py index 3a9f78d..f362c48 100644 --- a/patoolib/util.py +++ b/patoolib/util.py @@ -332,9 +332,17 @@ def stripext (filename): def get_single_outfile (directory, archive): """Get output filename if archive is in a single file format like gzip.""" outfile = os.path.join(directory, stripext(archive)) - if archive == outfile: + if is_same_filename(archive, outfile): # prevent overwriting the archive itself outfile += ".raw" + if os.path.exists(outfile): + # prevent overwriting existing files + i = 1 + newfile = "%s%d" % (outfile, i) + while os.path.exists(newfile): + newfile = "%s%d" % (outfile, i) + i += 1 + outfile = newfile return outfile diff --git a/tests/test_archives.py b/tests/test_archives.py index 77922c0..d863112 100644 --- a/tests/test_archives.py +++ b/tests/test_archives.py @@ -103,6 +103,10 @@ class TestArchives (ArchiveTest): self.archive_test('t .bz2') self.archive_create('t .bz2', singlefile=True) + def test_pybz2 (self): + self.program = 'pybz2' + self.archive_extract('t .bz2') + @needs_program('pbzip2') def test_pbzip2 (self): self.program = 'pbzip2' diff --git a/tests/test_util.py b/tests/test_util.py index 3df2891..94a836e 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Copyright (C) 2010-2011 Bastian Kleineidam +# Copyright (C) 2010-2012 Bastian Kleineidam # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by