Improve MIME detection of LZMA and LRZIP.
This commit is contained in:
parent
1a9344baec
commit
d2b12f985e
|
@ -2,6 +2,7 @@
|
|||
|
||||
* Add patoolib.__version__ (see PEP 396).
|
||||
Closes: GH bug #3
|
||||
* Improved detection of LZMA and LRZIP files with file(1).
|
||||
|
||||
|
||||
1.2 (released 27.6.2013)
|
||||
|
|
|
@ -21,7 +21,6 @@ import os
|
|||
import shutil
|
||||
import stat
|
||||
import importlib
|
||||
from . import util
|
||||
# PEP 396
|
||||
from .configuration import Version as __version__
|
||||
__all__ = ['list_formats', 'list_archive', 'extract_archive', 'test_archive',
|
||||
|
@ -272,6 +271,8 @@ ProgramModules = {
|
|||
}
|
||||
|
||||
|
||||
from . import util
|
||||
|
||||
def get_archive_format (filename):
|
||||
"""Detect filename archive format and optional compression."""
|
||||
mime, compression = util.guess_mime(filename)
|
||||
|
|
|
@ -23,7 +23,7 @@ import mimetypes
|
|||
import tempfile
|
||||
import time
|
||||
import traceback
|
||||
from . import configuration
|
||||
from . import configuration, ArchiveMimetypes, ArchiveCompressions
|
||||
try:
|
||||
from shutil import which
|
||||
except ImportError:
|
||||
|
@ -245,7 +245,6 @@ def guess_mime_mimedb (filename):
|
|||
mime, encoding = None, None
|
||||
if mimedb is not None:
|
||||
mime, encoding = mimedb.guess_type(filename, strict=False)
|
||||
from . import ArchiveMimetypes, ArchiveCompressions
|
||||
if mime not in ArchiveMimetypes and encoding in ArchiveCompressions:
|
||||
# Files like 't.txt.gz' are recognized with encoding as format, and
|
||||
# an unsupported mime-type like 'text/plain'. Fix this.
|
||||
|
@ -262,7 +261,7 @@ def guess_mime_file (filename):
|
|||
"""
|
||||
mime, encoding = None, None
|
||||
base, ext = os.path.splitext(filename)
|
||||
if ext.lower() in ('.lzma', '.alz', '.lrz'):
|
||||
if ext.lower() in ('.alz',):
|
||||
# let mimedb recognize these extensions
|
||||
return mime, encoding
|
||||
if os.path.isfile(filename):
|
||||
|
@ -271,6 +270,27 @@ def guess_mime_file (filename):
|
|||
mime, encoding = guess_mime_file_mime(file_prog, filename)
|
||||
if mime is None:
|
||||
mime = guess_mime_file_text(file_prog, filename)
|
||||
encoding = None
|
||||
if mime in Mime2Encoding:
|
||||
# try to look inside compressed archives
|
||||
cmd = [file_prog, "--brief", "--mime", "--uncompress", filename]
|
||||
try:
|
||||
outparts = backtick(cmd).strip().split(";")
|
||||
except OSError:
|
||||
# ignore errors, as file(1) is only a fallback
|
||||
return mime, encoding
|
||||
mime2 = outparts[0].split(" ", 1)[0]
|
||||
if mime2 in ('application/x-empty', 'application/octet-stream'):
|
||||
# The uncompressor program file(1) uses is not installed
|
||||
# or is not able to uncompress.
|
||||
# Try to get mime information from the file extension.
|
||||
mime2, encoding2 = guess_mime_mimedb(filename)
|
||||
if mime2 in ArchiveMimetypes:
|
||||
mime = mime2
|
||||
encoding = encoding2
|
||||
elif mime2 in ArchiveMimetypes:
|
||||
mime = mime2
|
||||
encoding = get_file_mime_encoding(outparts)
|
||||
return mime, encoding
|
||||
|
||||
|
||||
|
@ -284,27 +304,7 @@ def guess_mime_file_mime (file_prog, filename):
|
|||
mime = backtick(cmd).strip()
|
||||
except OSError:
|
||||
# ignore errors, as file(1) is only a fallback
|
||||
return mime, encoding
|
||||
from . import ArchiveMimetypes
|
||||
if mime in Mime2Encoding:
|
||||
# try to look inside compressed archives
|
||||
cmd = [file_prog, "--brief", "--mime", "--uncompress", filename]
|
||||
try:
|
||||
outparts = backtick(cmd).strip().split(";")
|
||||
except OSError:
|
||||
# ignore errors, as file(1) is only a fallback
|
||||
return mime, encoding
|
||||
mime2 = outparts[0].split(" ", 1)[0]
|
||||
if mime2 == 'application/x-empty':
|
||||
# The uncompressor program file(1) uses is not installed.
|
||||
# Try to get mime information from the file extension.
|
||||
mime2, encoding2 = guess_mime_mimedb(filename)
|
||||
if mime2 in ArchiveMimetypes:
|
||||
mime = mime2
|
||||
encoding = encoding2
|
||||
elif mime2 in ArchiveMimetypes:
|
||||
mime = mime2
|
||||
encoding = get_file_mime_encoding(outparts)
|
||||
pass
|
||||
if mime not in ArchiveMimetypes:
|
||||
mime, encoding = None, None
|
||||
return mime, encoding
|
||||
|
@ -331,6 +331,8 @@ FileText2Mime = {
|
|||
"ASCII cpio archive": "application/x-cpio",
|
||||
"Debian binary package": "application/x-debian-package",
|
||||
"gzip compressed data": "application/x-gzip",
|
||||
"LZMA compressed data": "application/x-lzma",
|
||||
"LRZIP compressed data": "application/x-lrzip",
|
||||
"lzop compressed data": "application/x-lzop",
|
||||
"Microsoft Cabinet archive data": "application/vnd.ms-cab-compressed",
|
||||
"RAR archive data": "application/x-rar",
|
||||
|
@ -361,7 +363,7 @@ def guess_mime_file_text (file_prog, filename):
|
|||
return None
|
||||
# match output against known strings
|
||||
for matcher, mime in FileText2Mime.items():
|
||||
if output.startswith(matcher):
|
||||
if output.startswith(matcher) and mime in ArchiveMimetypes:
|
||||
return mime
|
||||
return None
|
||||
|
||||
|
|
|
@ -61,9 +61,8 @@ class TestMime (unittest.TestCase):
|
|||
self.mime_test_file("t.txt.gz.foo", ("application/gzip", "application/x-gzip"))
|
||||
self.mime_test_file("t.jar", "application/zip")
|
||||
self.mime_test_file("t.jar.foo", "application/zip")
|
||||
# file(1) does not recognize .lzma files
|
||||
#self.mime_test_file("t.lzma", "application/x-lzma")
|
||||
#self.mime_test_file("t.lzma.foo", "application/x-lzma")
|
||||
self.mime_test_file("t.txt.lzma", "application/x-lzma")
|
||||
self.mime_test_file("t.txt.lzma.foo", "application/x-lzma")
|
||||
self.mime_test_file("t.txt.lz", "application/x-lzip")
|
||||
self.mime_test_file("t.txt.lz.foo", "application/x-lzip")
|
||||
self.mime_test_file("t.txt.lzo", "application/x-lzop")
|
||||
|
@ -82,8 +81,8 @@ class TestMime (unittest.TestCase):
|
|||
self.mime_test_file("t.tgz", "application/x-tar", "gzip")
|
||||
self.mime_test_file("t.tar.xz", "application/x-tar", "xz")
|
||||
self.mime_test_file("t.tar.Z", "application/x-tar", "compress")
|
||||
# file(1) does not recognize .lzma files
|
||||
#self.mime_test_file("t.tar.lzma", "application/x-tar", "lzma")
|
||||
self.mime_test_file("t.tar.lzma", "application/x-tar", "lzma")
|
||||
# file(1) cannot uncompress .lzma files
|
||||
#self.mime_test_file("t.tar.lzma.foo", "application/x-tar", "lzma")
|
||||
self.mime_test_file("t.txt.xz", "application/x-xz")
|
||||
self.mime_test_file("t.txt.xz.foo", "application/x-xz")
|
||||
|
@ -105,9 +104,8 @@ class TestMime (unittest.TestCase):
|
|||
#self.mime_test_file("t.alz.foo", "application/x-alzip")
|
||||
self.mime_test_file("t.arc", "application/x-arc")
|
||||
self.mime_test_file("t.arc.foo", "application/x-arc")
|
||||
# file(1) does not recognize .lrz files
|
||||
#self.mime_test_file("t.txt.lrz", "application/x-lrzip")
|
||||
#self.mime_test_file("t.txt.lrz.foo", "application/x-lrzip")
|
||||
self.mime_test_file("t.txt.lrz", "application/x-lrzip")
|
||||
self.mime_test_file("t.txt.lrz.foo", "application/x-lrzip")
|
||||
self.mime_test_file("t.txt.rz", "application/x-rzip")
|
||||
self.mime_test_file("t.txt.rz.foo", "application/x-rzip")
|
||||
self.mime_test_file("t.zoo", "application/x-zoo")
|
||||
|
|
Loading…
Reference in New Issue