Refactor mime detection functions and add better tests.

This commit is contained in:
Bastian Kleineidam 2010-03-06 19:20:50 +01:00
parent 6cba779475
commit 960d15cd76
2 changed files with 134 additions and 80 deletions

View File

@ -90,24 +90,13 @@ def run (cmd, **kwargs):
@memoized
def guess_mime (filename):
"""Guess the MIME type of given filename using three methods:
(a) using file(1) --mime
(b) using file(1) and look the result string
(c) looking at the filename extension with the Python mimetypes module
Of course only (c) will be eventually successful if the system does not
have the file(1) program installed or the given file is not readable.
The encoding is determined by method (c).
"""Guess the MIME type of given filename using file(1) and if that
fails by looking at the filename extension with the Python mimetypes
module.
The result of this function is cached.
"""
mime, encoding = None, None
if os.path.isfile(filename):
file_prog = find_program("file")
if file_prog:
mime, encoding = guess_mime_file_mime(file_prog, filename)
if mime is None:
mime = guess_mime_file(file_prog, filename)
mime, encoding = guess_mime_file(filename)
if mime is None:
mime, encoding = guess_mime_mimedb(filename)
assert mime is not None or encoding is None
@ -137,6 +126,21 @@ def guess_mime_mimedb (filename):
return mime, encoding
def guess_mime_file (filename):
"""Determine MIME type of filename with file(1):
(a) using file(1) --mime
(b) using file(1) and look the result string
"""
mime, encoding = None, None
if os.path.isfile(filename):
file_prog = find_program("file")
if file_prog:
mime, encoding = guess_mime_file_mime(file_prog, filename)
if mime is None:
mime = guess_mime_file_text(file_prog, filename)
return mime, encoding
def guess_mime_file_mime (file_prog, filename):
"""Determine MIME type of filename with file(1) and --mime option."""
mime, encoding = None, None
@ -195,7 +199,7 @@ FileText2Mime = {
"current ar archive": "application/x-archive",
}
def guess_mime_file (file_prog, filename):
def guess_mime_file_text (file_prog, filename):
"""Determine MIME type of filename with file(1)."""
cmd = [file_prog, "--brief", filename]
try:

View File

@ -21,72 +21,122 @@ from tests import needs_program, datadir
class TestMime (unittest.TestCase):
def mime_test (self, filename, mime, encoding):
"""Test that file has given mime and encoding."""
def mime_test (self, func, filename, mime, encoding):
"""Test that file has given mime and encoding as determined by
given function."""
archive = os.path.join(datadir, filename)
res = patoolib.util.guess_mime(archive)
fail_msg = "MIME type for archive `%s' should be (%s, %s), but was %s" % (filename, mime, encoding, res)
self.assertEqual(res, (mime, encoding), fail_msg)
file_mime, file_encoding = func(archive)
fail_msg = "MIME type for archive `%s' should be (%s, %s), but was (%s, %s)" % (filename, mime, encoding, file_mime, file_encoding)
self.assertEqual((file_mime, file_encoding), (mime, encoding), fail_msg)
def mime_test_file (self, filename, mime, encoding):
"""Test that file has given mime and encoding as determined by
file(1)."""
self.mime_test(patoolib.util.guess_mime_file, filename, mime, encoding)
def mime_test_mimedb (self, filename, mime, encoding):
"""Test that file has given mime and encoding as determined by the
mimetypes module."""
self.mime_test(patoolib.util.guess_mime_mimedb, filename, mime, encoding)
@needs_program('file')
def test_mime (self):
self.mime_test("t.7z", "application/x-7z-compressed", None)
self.mime_test("t.7z.foo", "application/x-7z-compressed", None)
self.mime_test("t.arj", "application/x-arj", None)
self.mime_test("t.arj.foo", "application/x-arj", None)
self.mime_test("t.bz2", "application/x-bzip2", None)
self.mime_test("t.bz2.foo", "application/x-bzip2", None)
self.mime_test("t.cab", "application/vnd.ms-cab-compressed", None)
self.mime_test("t.cab.foo", "application/vnd.ms-cab-compressed", None)
self.mime_test("t.cpio", "application/x-cpio", None)
self.mime_test("t.cpio.foo", "application/x-cpio", None)
self.mime_test("t.deb", "application/x-debian-package", None)
self.mime_test("t.deb.foo", "application/x-debian-package", None)
self.mime_test("t.gz", "application/x-gzip", None)
self.mime_test("t.gz.foo", "application/x-gzip", None)
self.mime_test("t.jar", "application/zip", None)
self.mime_test("t.jar.foo", "application/zip", None)
self.mime_test("t.lzma", "application/x-lzma", None)
def test_mime_file (self):
self.mime_test_file("t.7z", "application/x-7z-compressed", None)
self.mime_test_file("t.7z.foo", "application/x-7z-compressed", None)
self.mime_test_file("t.arj", "application/x-arj", None)
self.mime_test_file("t.arj.foo", "application/x-arj", None)
self.mime_test_file("t.bz2", "application/x-bzip2", None)
self.mime_test_file("t.bz2.foo", "application/x-bzip2", None)
self.mime_test_file("t.cab", "application/vnd.ms-cab-compressed", None)
self.mime_test_file("t.cab.foo", "application/vnd.ms-cab-compressed", None)
self.mime_test_file("t.cpio", "application/x-cpio", None)
self.mime_test_file("t.cpio.foo", "application/x-cpio", None)
self.mime_test_file("t.deb", "application/x-debian-package", None)
self.mime_test_file("t.deb.foo", "application/x-debian-package", None)
self.mime_test_file("t.gz", "application/x-gzip", None)
self.mime_test_file("t.gz.foo", "application/x-gzip", None)
self.mime_test_file("t.jar", "application/zip", None)
self.mime_test_file("t.jar.foo", "application/zip", None)
# file(1) does not recognize .lzma files
#self.mime_test("t.lzma.foo", "application/x-lzma", None)
self.mime_test("t.txt.lz", "application/x-lzip", None)
self.mime_test("t.txt.lz.foo", "application/x-lzip", None)
self.mime_test("t.lzo", "application/x-lzop", None)
self.mime_test("t.lzo.foo", "application/x-lzop", None)
self.mime_test("t.rar", "application/x-rar", None)
self.mime_test("t.rar.foo", "application/x-rar", None)
self.mime_test("t.rpm", "application/x-rpm", None)
self.mime_test("t.rpm.foo", "application/x-rpm", None)
self.mime_test("t.tar", "application/x-tar", None)
self.mime_test("t.tar.foo", "application/x-tar", None)
self.mime_test("t.tar.bz2", "application/x-tar", "bzip2")
self.mime_test("t.tar.bz2.foo", "application/x-tar", "bzip2")
self.mime_test("t.tar.gz", "application/x-tar", "gzip")
self.mime_test("t.tar.gz.foo", "application/x-tar", "gzip")
self.mime_test("t.tar.lzma", "application/x-tar", "lzma")
#self.mime_test_file("t.lzma", "application/x-lzma", None)
#self.mime_test_file("t.lzma.foo", "application/x-lzma", None)
self.mime_test_file("t.txt.lz", "application/x-lzip", None)
self.mime_test_file("t.txt.lz.foo", "application/x-lzip", None)
self.mime_test_file("t.lzo", "application/x-lzop", None)
self.mime_test_file("t.lzo.foo", "application/x-lzop", None)
self.mime_test_file("t.rar", "application/x-rar", None)
self.mime_test_file("t.rar.foo", "application/x-rar", None)
self.mime_test_file("t.rpm", "application/x-rpm", None)
self.mime_test_file("t.rpm.foo", "application/x-rpm", None)
self.mime_test_file("t.tar", "application/x-tar", None)
self.mime_test_file("t.tar.foo", "application/x-tar", None)
self.mime_test_file("t.tar.bz2", "application/x-tar", "bzip2")
self.mime_test_file("t.tar.bz2.foo", "application/x-tar", "bzip2")
self.mime_test_file("t.tar.gz", "application/x-tar", "gzip")
self.mime_test_file("t.tar.gz.foo", "application/x-tar", "gzip")
# file(1) does not recognize .lzma files
#self.mime_test("t.tar.lzma.foo", "application/x-tar", "lzma")
self.mime_test("t.tar.xz", "application/x-tar", "xz")
self.mime_test("t.tar.xz.foo", "application/x-tar", "xz")
self.mime_test("t.tar.lz", "application/x-tar", "lzip")
self.mime_test("t.tar.lz.foo", "application/x-tar", "lzip")
self.mime_test("t.tar.Z", "application/x-tar", "compress")
self.mime_test("t.tar.Z.foo", "application/x-tar", "compress")
self.mime_test("t.taz", "application/x-tar", "compress")
self.mime_test("t.taz.foo", "application/x-tar", "compress")
self.mime_test("t.tbz2", "application/x-tar", "bzip2")
self.mime_test("t.tbz2.foo", "application/x-tar", "bzip2")
self.mime_test("t.tgz", "application/x-tar", "gzip")
self.mime_test("t.tgz.foo", "application/x-tar", "gzip")
self.mime_test("t.txt.gz", "application/x-gzip", None)
self.mime_test("t.txt.gz.foo", "application/x-gzip", None)
self.mime_test("t.xz", "application/x-xz", None)
self.mime_test("t.xz.foo", "application/x-xz", None)
self.mime_test("t.Z", "application/x-compress", None)
self.mime_test("t.Z.foo", "application/x-compress", None)
self.mime_test("t.zip", "application/zip", None)
self.mime_test("t.zip.foo", "application/zip", None)
self.mime_test("t.ace", "application/x-ace", None)
self.mime_test("t.ace.foo", "application/x-ace", None)
self.mime_test("t.a", "application/x-archive", None)
self.mime_test("t.a.foo", "application/x-archive", None)
#self.mime_test_file("t.tar.lzma", "application/x-tar", "lzma")
#self.mime_test_file("t.tar.lzma.foo", "application/x-tar", "lzma")
self.mime_test_file("t.tar.xz", "application/x-tar", "xz")
self.mime_test_file("t.tar.xz.foo", "application/x-tar", "xz")
self.mime_test_file("t.tar.lz", "application/x-tar", "lzip")
self.mime_test_file("t.tar.lz.foo", "application/x-tar", "lzip")
self.mime_test_file("t.tar.Z", "application/x-tar", "compress")
self.mime_test_file("t.tar.Z.foo", "application/x-tar", "compress")
self.mime_test_file("t.taz", "application/x-tar", "gzip")
self.mime_test_file("t.taz.foo", "application/x-tar", "gzip")
self.mime_test_file("t.tbz2", "application/x-tar", "bzip2")
self.mime_test_file("t.tbz2.foo", "application/x-tar", "bzip2")
self.mime_test_file("t.tgz", "application/x-tar", "gzip")
self.mime_test_file("t.tgz.foo", "application/x-tar", "gzip")
self.mime_test_file("t.txt.gz", "application/x-gzip", None)
self.mime_test_file("t.txt.gz.foo", "application/x-gzip", None)
self.mime_test_file("t.xz", "application/x-xz", None)
self.mime_test_file("t.xz.foo", "application/x-xz", None)
self.mime_test_file("t.Z", "application/x-compress", None)
self.mime_test_file("t.Z.foo", "application/x-compress", None)
self.mime_test_file("t.jar", "application/zip", None)
self.mime_test_file("t.jar.foo", "application/zip", None)
self.mime_test_file("t.zip", "application/zip", None)
self.mime_test_file("t.zip.foo", "application/zip", None)
self.mime_test_file("t.ace", "application/x-ace", None)
self.mime_test_file("t.ace.foo", "application/x-ace", None)
self.mime_test_file("t.a", "application/x-archive", None)
self.mime_test_file("t.a.foo", "application/x-archive", None)
self.mime_test_file("t.lha", "application/x-lha", None)
self.mime_test_file("t.lzh", "application/x-lha", None)
self.mime_test_file("t.lha.foo", "application/x-lha", None)
def test_mime_mimedb (self):
self.mime_test_mimedb("t.7z", "application/x-7z-compressed", None)
self.mime_test_mimedb("t.arj", "application/x-arj", None)
self.mime_test_mimedb("t.bz2", "application/x-bzip2", None)
self.mime_test_mimedb("t.cab", "application/x-cab", None)
self.mime_test_mimedb("t.cpio", "application/x-cpio", None)
self.mime_test_mimedb("t.deb", "application/x-debian-package", None)
self.mime_test_mimedb("t.gz", "application/x-gzip", None)
self.mime_test_mimedb("t.jar", "application/java-archive", None)
self.mime_test_mimedb("t.lzma", "application/x-lzma", None)
self.mime_test_mimedb("t.txt.lz", "application/x-lzip", None)
self.mime_test_mimedb("t.lzo", "application/x-lzop", None)
self.mime_test_mimedb("t.rar", "application/rar", None)
self.mime_test_mimedb("t.rpm", "application/x-redhat-package-manager", None)
self.mime_test_mimedb("t.tar", "application/x-tar", None)
self.mime_test_mimedb("t.tar.bz2", "application/x-tar", "bzip2")
self.mime_test_mimedb("t.tar.gz", "application/x-tar", "gzip")
self.mime_test_mimedb("t.tar.lzma", "application/x-tar", "lzma")
self.mime_test_mimedb("t.tar.xz", "application/x-tar", "xz")
self.mime_test_mimedb("t.tar.lz", "application/x-tar", "lzip")
self.mime_test_mimedb("t.tar.Z", "application/x-tar", "compress")
self.mime_test_mimedb("t.taz", "application/x-tar", "gzip")
self.mime_test_mimedb("t.tbz2", "application/x-tar", "bzip2")
self.mime_test_mimedb("t.tgz", "application/x-tar", "gzip")
self.mime_test_mimedb("t.txt.gz", "application/x-gzip", None)
self.mime_test_mimedb("t.xz", "application/x-xz", None)
self.mime_test_mimedb("t.Z", "application/x-compress", None)
self.mime_test_mimedb("t.zip", "application/zip", None)
self.mime_test_mimedb("t.ace", "application/x-ace", None)
self.mime_test_mimedb("t.a", "application/x-archive", None)
self.mime_test_mimedb("t.lha", "application/x-lha", None)
self.mime_test_mimedb("t.lzh", "application/x-lzh", None)