Improve MIME detection of LZMA and LRZIP.

This commit is contained in:
Bastian Kleineidam 2013-07-16 20:14:19 +02:00
parent 1a9344baec
commit d2b12f985e
4 changed files with 36 additions and 34 deletions

View File

@ -2,6 +2,7 @@
* Add patoolib.__version__ (see PEP 396).
Closes: GH bug #3
* Improved detection of LZMA and LRZIP files with file(1).
1.2 (released 27.6.2013)

View File

@ -21,7 +21,6 @@ import os
import shutil
import stat
import importlib
from . import util
# PEP 396
from .configuration import Version as __version__
__all__ = ['list_formats', 'list_archive', 'extract_archive', 'test_archive',
@ -272,6 +271,8 @@ ProgramModules = {
}
from . import util
def get_archive_format (filename):
"""Detect filename archive format and optional compression."""
mime, compression = util.guess_mime(filename)

View File

@ -23,7 +23,7 @@ import mimetypes
import tempfile
import time
import traceback
from . import configuration
from . import configuration, ArchiveMimetypes, ArchiveCompressions
try:
from shutil import which
except ImportError:
@ -245,7 +245,6 @@ def guess_mime_mimedb (filename):
mime, encoding = None, None
if mimedb is not None:
mime, encoding = mimedb.guess_type(filename, strict=False)
from . import ArchiveMimetypes, ArchiveCompressions
if mime not in ArchiveMimetypes and encoding in ArchiveCompressions:
# Files like 't.txt.gz' are recognized with encoding as format, and
# an unsupported mime-type like 'text/plain'. Fix this.
@ -262,7 +261,7 @@ def guess_mime_file (filename):
"""
mime, encoding = None, None
base, ext = os.path.splitext(filename)
if ext.lower() in ('.lzma', '.alz', '.lrz'):
if ext.lower() in ('.alz',):
# let mimedb recognize these extensions
return mime, encoding
if os.path.isfile(filename):
@ -271,6 +270,27 @@ def guess_mime_file (filename):
mime, encoding = guess_mime_file_mime(file_prog, filename)
if mime is None:
mime = guess_mime_file_text(file_prog, filename)
encoding = None
if mime in Mime2Encoding:
# try to look inside compressed archives
cmd = [file_prog, "--brief", "--mime", "--uncompress", filename]
try:
outparts = backtick(cmd).strip().split(";")
except OSError:
# ignore errors, as file(1) is only a fallback
return mime, encoding
mime2 = outparts[0].split(" ", 1)[0]
if mime2 in ('application/x-empty', 'application/octet-stream'):
# The uncompressor program file(1) uses is not installed
# or is not able to uncompress.
# Try to get mime information from the file extension.
mime2, encoding2 = guess_mime_mimedb(filename)
if mime2 in ArchiveMimetypes:
mime = mime2
encoding = encoding2
elif mime2 in ArchiveMimetypes:
mime = mime2
encoding = get_file_mime_encoding(outparts)
return mime, encoding
@ -284,27 +304,7 @@ def guess_mime_file_mime (file_prog, filename):
mime = backtick(cmd).strip()
except OSError:
# ignore errors, as file(1) is only a fallback
return mime, encoding
from . import ArchiveMimetypes
if mime in Mime2Encoding:
# try to look inside compressed archives
cmd = [file_prog, "--brief", "--mime", "--uncompress", filename]
try:
outparts = backtick(cmd).strip().split(";")
except OSError:
# ignore errors, as file(1) is only a fallback
return mime, encoding
mime2 = outparts[0].split(" ", 1)[0]
if mime2 == 'application/x-empty':
# The uncompressor program file(1) uses is not installed.
# Try to get mime information from the file extension.
mime2, encoding2 = guess_mime_mimedb(filename)
if mime2 in ArchiveMimetypes:
mime = mime2
encoding = encoding2
elif mime2 in ArchiveMimetypes:
mime = mime2
encoding = get_file_mime_encoding(outparts)
pass
if mime not in ArchiveMimetypes:
mime, encoding = None, None
return mime, encoding
@ -331,6 +331,8 @@ FileText2Mime = {
"ASCII cpio archive": "application/x-cpio",
"Debian binary package": "application/x-debian-package",
"gzip compressed data": "application/x-gzip",
"LZMA compressed data": "application/x-lzma",
"LRZIP compressed data": "application/x-lrzip",
"lzop compressed data": "application/x-lzop",
"Microsoft Cabinet archive data": "application/vnd.ms-cab-compressed",
"RAR archive data": "application/x-rar",
@ -361,7 +363,7 @@ def guess_mime_file_text (file_prog, filename):
return None
# match output against known strings
for matcher, mime in FileText2Mime.items():
if output.startswith(matcher):
if output.startswith(matcher) and mime in ArchiveMimetypes:
return mime
return None

View File

@ -61,9 +61,8 @@ class TestMime (unittest.TestCase):
self.mime_test_file("t.txt.gz.foo", ("application/gzip", "application/x-gzip"))
self.mime_test_file("t.jar", "application/zip")
self.mime_test_file("t.jar.foo", "application/zip")
# file(1) does not recognize .lzma files
#self.mime_test_file("t.lzma", "application/x-lzma")
#self.mime_test_file("t.lzma.foo", "application/x-lzma")
self.mime_test_file("t.txt.lzma", "application/x-lzma")
self.mime_test_file("t.txt.lzma.foo", "application/x-lzma")
self.mime_test_file("t.txt.lz", "application/x-lzip")
self.mime_test_file("t.txt.lz.foo", "application/x-lzip")
self.mime_test_file("t.txt.lzo", "application/x-lzop")
@ -82,8 +81,8 @@ class TestMime (unittest.TestCase):
self.mime_test_file("t.tgz", "application/x-tar", "gzip")
self.mime_test_file("t.tar.xz", "application/x-tar", "xz")
self.mime_test_file("t.tar.Z", "application/x-tar", "compress")
# file(1) does not recognize .lzma files
#self.mime_test_file("t.tar.lzma", "application/x-tar", "lzma")
self.mime_test_file("t.tar.lzma", "application/x-tar", "lzma")
# file(1) cannot uncompress .lzma files
#self.mime_test_file("t.tar.lzma.foo", "application/x-tar", "lzma")
self.mime_test_file("t.txt.xz", "application/x-xz")
self.mime_test_file("t.txt.xz.foo", "application/x-xz")
@ -105,9 +104,8 @@ class TestMime (unittest.TestCase):
#self.mime_test_file("t.alz.foo", "application/x-alzip")
self.mime_test_file("t.arc", "application/x-arc")
self.mime_test_file("t.arc.foo", "application/x-arc")
# file(1) does not recognize .lrz files
#self.mime_test_file("t.txt.lrz", "application/x-lrzip")
#self.mime_test_file("t.txt.lrz.foo", "application/x-lrzip")
self.mime_test_file("t.txt.lrz", "application/x-lrzip")
self.mime_test_file("t.txt.lrz.foo", "application/x-lrzip")
self.mime_test_file("t.txt.rz", "application/x-rzip")
self.mime_test_file("t.txt.rz.foo", "application/x-rzip")
self.mime_test_file("t.zoo", "application/x-zoo")