Improve MIME detection of LZMA and LRZIP.
This commit is contained in:
parent
1a9344baec
commit
d2b12f985e
|
@ -2,6 +2,7 @@
|
||||||
|
|
||||||
* Add patoolib.__version__ (see PEP 396).
|
* Add patoolib.__version__ (see PEP 396).
|
||||||
Closes: GH bug #3
|
Closes: GH bug #3
|
||||||
|
* Improved detection of LZMA and LRZIP files with file(1).
|
||||||
|
|
||||||
|
|
||||||
1.2 (released 27.6.2013)
|
1.2 (released 27.6.2013)
|
||||||
|
|
|
@ -21,7 +21,6 @@ import os
|
||||||
import shutil
|
import shutil
|
||||||
import stat
|
import stat
|
||||||
import importlib
|
import importlib
|
||||||
from . import util
|
|
||||||
# PEP 396
|
# PEP 396
|
||||||
from .configuration import Version as __version__
|
from .configuration import Version as __version__
|
||||||
__all__ = ['list_formats', 'list_archive', 'extract_archive', 'test_archive',
|
__all__ = ['list_formats', 'list_archive', 'extract_archive', 'test_archive',
|
||||||
|
@ -272,6 +271,8 @@ ProgramModules = {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
from . import util
|
||||||
|
|
||||||
def get_archive_format (filename):
|
def get_archive_format (filename):
|
||||||
"""Detect filename archive format and optional compression."""
|
"""Detect filename archive format and optional compression."""
|
||||||
mime, compression = util.guess_mime(filename)
|
mime, compression = util.guess_mime(filename)
|
||||||
|
|
|
@ -23,7 +23,7 @@ import mimetypes
|
||||||
import tempfile
|
import tempfile
|
||||||
import time
|
import time
|
||||||
import traceback
|
import traceback
|
||||||
from . import configuration
|
from . import configuration, ArchiveMimetypes, ArchiveCompressions
|
||||||
try:
|
try:
|
||||||
from shutil import which
|
from shutil import which
|
||||||
except ImportError:
|
except ImportError:
|
||||||
|
@ -245,7 +245,6 @@ def guess_mime_mimedb (filename):
|
||||||
mime, encoding = None, None
|
mime, encoding = None, None
|
||||||
if mimedb is not None:
|
if mimedb is not None:
|
||||||
mime, encoding = mimedb.guess_type(filename, strict=False)
|
mime, encoding = mimedb.guess_type(filename, strict=False)
|
||||||
from . import ArchiveMimetypes, ArchiveCompressions
|
|
||||||
if mime not in ArchiveMimetypes and encoding in ArchiveCompressions:
|
if mime not in ArchiveMimetypes and encoding in ArchiveCompressions:
|
||||||
# Files like 't.txt.gz' are recognized with encoding as format, and
|
# Files like 't.txt.gz' are recognized with encoding as format, and
|
||||||
# an unsupported mime-type like 'text/plain'. Fix this.
|
# an unsupported mime-type like 'text/plain'. Fix this.
|
||||||
|
@ -262,7 +261,7 @@ def guess_mime_file (filename):
|
||||||
"""
|
"""
|
||||||
mime, encoding = None, None
|
mime, encoding = None, None
|
||||||
base, ext = os.path.splitext(filename)
|
base, ext = os.path.splitext(filename)
|
||||||
if ext.lower() in ('.lzma', '.alz', '.lrz'):
|
if ext.lower() in ('.alz',):
|
||||||
# let mimedb recognize these extensions
|
# let mimedb recognize these extensions
|
||||||
return mime, encoding
|
return mime, encoding
|
||||||
if os.path.isfile(filename):
|
if os.path.isfile(filename):
|
||||||
|
@ -271,6 +270,27 @@ def guess_mime_file (filename):
|
||||||
mime, encoding = guess_mime_file_mime(file_prog, filename)
|
mime, encoding = guess_mime_file_mime(file_prog, filename)
|
||||||
if mime is None:
|
if mime is None:
|
||||||
mime = guess_mime_file_text(file_prog, filename)
|
mime = guess_mime_file_text(file_prog, filename)
|
||||||
|
encoding = None
|
||||||
|
if mime in Mime2Encoding:
|
||||||
|
# try to look inside compressed archives
|
||||||
|
cmd = [file_prog, "--brief", "--mime", "--uncompress", filename]
|
||||||
|
try:
|
||||||
|
outparts = backtick(cmd).strip().split(";")
|
||||||
|
except OSError:
|
||||||
|
# ignore errors, as file(1) is only a fallback
|
||||||
|
return mime, encoding
|
||||||
|
mime2 = outparts[0].split(" ", 1)[0]
|
||||||
|
if mime2 in ('application/x-empty', 'application/octet-stream'):
|
||||||
|
# The uncompressor program file(1) uses is not installed
|
||||||
|
# or is not able to uncompress.
|
||||||
|
# Try to get mime information from the file extension.
|
||||||
|
mime2, encoding2 = guess_mime_mimedb(filename)
|
||||||
|
if mime2 in ArchiveMimetypes:
|
||||||
|
mime = mime2
|
||||||
|
encoding = encoding2
|
||||||
|
elif mime2 in ArchiveMimetypes:
|
||||||
|
mime = mime2
|
||||||
|
encoding = get_file_mime_encoding(outparts)
|
||||||
return mime, encoding
|
return mime, encoding
|
||||||
|
|
||||||
|
|
||||||
|
@ -284,27 +304,7 @@ def guess_mime_file_mime (file_prog, filename):
|
||||||
mime = backtick(cmd).strip()
|
mime = backtick(cmd).strip()
|
||||||
except OSError:
|
except OSError:
|
||||||
# ignore errors, as file(1) is only a fallback
|
# ignore errors, as file(1) is only a fallback
|
||||||
return mime, encoding
|
pass
|
||||||
from . import ArchiveMimetypes
|
|
||||||
if mime in Mime2Encoding:
|
|
||||||
# try to look inside compressed archives
|
|
||||||
cmd = [file_prog, "--brief", "--mime", "--uncompress", filename]
|
|
||||||
try:
|
|
||||||
outparts = backtick(cmd).strip().split(";")
|
|
||||||
except OSError:
|
|
||||||
# ignore errors, as file(1) is only a fallback
|
|
||||||
return mime, encoding
|
|
||||||
mime2 = outparts[0].split(" ", 1)[0]
|
|
||||||
if mime2 == 'application/x-empty':
|
|
||||||
# The uncompressor program file(1) uses is not installed.
|
|
||||||
# Try to get mime information from the file extension.
|
|
||||||
mime2, encoding2 = guess_mime_mimedb(filename)
|
|
||||||
if mime2 in ArchiveMimetypes:
|
|
||||||
mime = mime2
|
|
||||||
encoding = encoding2
|
|
||||||
elif mime2 in ArchiveMimetypes:
|
|
||||||
mime = mime2
|
|
||||||
encoding = get_file_mime_encoding(outparts)
|
|
||||||
if mime not in ArchiveMimetypes:
|
if mime not in ArchiveMimetypes:
|
||||||
mime, encoding = None, None
|
mime, encoding = None, None
|
||||||
return mime, encoding
|
return mime, encoding
|
||||||
|
@ -331,6 +331,8 @@ FileText2Mime = {
|
||||||
"ASCII cpio archive": "application/x-cpio",
|
"ASCII cpio archive": "application/x-cpio",
|
||||||
"Debian binary package": "application/x-debian-package",
|
"Debian binary package": "application/x-debian-package",
|
||||||
"gzip compressed data": "application/x-gzip",
|
"gzip compressed data": "application/x-gzip",
|
||||||
|
"LZMA compressed data": "application/x-lzma",
|
||||||
|
"LRZIP compressed data": "application/x-lrzip",
|
||||||
"lzop compressed data": "application/x-lzop",
|
"lzop compressed data": "application/x-lzop",
|
||||||
"Microsoft Cabinet archive data": "application/vnd.ms-cab-compressed",
|
"Microsoft Cabinet archive data": "application/vnd.ms-cab-compressed",
|
||||||
"RAR archive data": "application/x-rar",
|
"RAR archive data": "application/x-rar",
|
||||||
|
@ -361,7 +363,7 @@ def guess_mime_file_text (file_prog, filename):
|
||||||
return None
|
return None
|
||||||
# match output against known strings
|
# match output against known strings
|
||||||
for matcher, mime in FileText2Mime.items():
|
for matcher, mime in FileText2Mime.items():
|
||||||
if output.startswith(matcher):
|
if output.startswith(matcher) and mime in ArchiveMimetypes:
|
||||||
return mime
|
return mime
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
|
@ -61,9 +61,8 @@ class TestMime (unittest.TestCase):
|
||||||
self.mime_test_file("t.txt.gz.foo", ("application/gzip", "application/x-gzip"))
|
self.mime_test_file("t.txt.gz.foo", ("application/gzip", "application/x-gzip"))
|
||||||
self.mime_test_file("t.jar", "application/zip")
|
self.mime_test_file("t.jar", "application/zip")
|
||||||
self.mime_test_file("t.jar.foo", "application/zip")
|
self.mime_test_file("t.jar.foo", "application/zip")
|
||||||
# file(1) does not recognize .lzma files
|
self.mime_test_file("t.txt.lzma", "application/x-lzma")
|
||||||
#self.mime_test_file("t.lzma", "application/x-lzma")
|
self.mime_test_file("t.txt.lzma.foo", "application/x-lzma")
|
||||||
#self.mime_test_file("t.lzma.foo", "application/x-lzma")
|
|
||||||
self.mime_test_file("t.txt.lz", "application/x-lzip")
|
self.mime_test_file("t.txt.lz", "application/x-lzip")
|
||||||
self.mime_test_file("t.txt.lz.foo", "application/x-lzip")
|
self.mime_test_file("t.txt.lz.foo", "application/x-lzip")
|
||||||
self.mime_test_file("t.txt.lzo", "application/x-lzop")
|
self.mime_test_file("t.txt.lzo", "application/x-lzop")
|
||||||
|
@ -82,8 +81,8 @@ class TestMime (unittest.TestCase):
|
||||||
self.mime_test_file("t.tgz", "application/x-tar", "gzip")
|
self.mime_test_file("t.tgz", "application/x-tar", "gzip")
|
||||||
self.mime_test_file("t.tar.xz", "application/x-tar", "xz")
|
self.mime_test_file("t.tar.xz", "application/x-tar", "xz")
|
||||||
self.mime_test_file("t.tar.Z", "application/x-tar", "compress")
|
self.mime_test_file("t.tar.Z", "application/x-tar", "compress")
|
||||||
# file(1) does not recognize .lzma files
|
self.mime_test_file("t.tar.lzma", "application/x-tar", "lzma")
|
||||||
#self.mime_test_file("t.tar.lzma", "application/x-tar", "lzma")
|
# file(1) cannot uncompress .lzma files
|
||||||
#self.mime_test_file("t.tar.lzma.foo", "application/x-tar", "lzma")
|
#self.mime_test_file("t.tar.lzma.foo", "application/x-tar", "lzma")
|
||||||
self.mime_test_file("t.txt.xz", "application/x-xz")
|
self.mime_test_file("t.txt.xz", "application/x-xz")
|
||||||
self.mime_test_file("t.txt.xz.foo", "application/x-xz")
|
self.mime_test_file("t.txt.xz.foo", "application/x-xz")
|
||||||
|
@ -105,9 +104,8 @@ class TestMime (unittest.TestCase):
|
||||||
#self.mime_test_file("t.alz.foo", "application/x-alzip")
|
#self.mime_test_file("t.alz.foo", "application/x-alzip")
|
||||||
self.mime_test_file("t.arc", "application/x-arc")
|
self.mime_test_file("t.arc", "application/x-arc")
|
||||||
self.mime_test_file("t.arc.foo", "application/x-arc")
|
self.mime_test_file("t.arc.foo", "application/x-arc")
|
||||||
# file(1) does not recognize .lrz files
|
self.mime_test_file("t.txt.lrz", "application/x-lrzip")
|
||||||
#self.mime_test_file("t.txt.lrz", "application/x-lrzip")
|
self.mime_test_file("t.txt.lrz.foo", "application/x-lrzip")
|
||||||
#self.mime_test_file("t.txt.lrz.foo", "application/x-lrzip")
|
|
||||||
self.mime_test_file("t.txt.rz", "application/x-rzip")
|
self.mime_test_file("t.txt.rz", "application/x-rzip")
|
||||||
self.mime_test_file("t.txt.rz.foo", "application/x-rzip")
|
self.mime_test_file("t.txt.rz.foo", "application/x-rzip")
|
||||||
self.mime_test_file("t.zoo", "application/x-zoo")
|
self.mime_test_file("t.zoo", "application/x-zoo")
|
||||||
|
|
Loading…
Reference in New Issue