Refactor mime detection functions and add better tests.

This commit is contained in:
Bastian Kleineidam 2010-03-06 19:20:50 +01:00
parent 6cba779475
commit 960d15cd76
2 changed files with 134 additions and 80 deletions

View File

@ -90,24 +90,13 @@ def run (cmd, **kwargs):
@memoized @memoized
def guess_mime (filename): def guess_mime (filename):
"""Guess the MIME type of given filename using three methods: """Guess the MIME type of given filename using file(1) and if that
(a) using file(1) --mime fails by looking at the filename extension with the Python mimetypes
(b) using file(1) and look the result string module.
(c) looking at the filename extension with the Python mimetypes module
Of course only (c) will be eventually successful if the system does not
have the file(1) program installed or the given file is not readable.
The encoding is determined by method (c).
The result of this function is cached. The result of this function is cached.
""" """
mime, encoding = None, None mime, encoding = guess_mime_file(filename)
if os.path.isfile(filename):
file_prog = find_program("file")
if file_prog:
mime, encoding = guess_mime_file_mime(file_prog, filename)
if mime is None:
mime = guess_mime_file(file_prog, filename)
if mime is None: if mime is None:
mime, encoding = guess_mime_mimedb(filename) mime, encoding = guess_mime_mimedb(filename)
assert mime is not None or encoding is None assert mime is not None or encoding is None
@ -137,6 +126,21 @@ def guess_mime_mimedb (filename):
return mime, encoding return mime, encoding
def guess_mime_file (filename):
"""Determine MIME type of filename with file(1):
(a) using file(1) --mime
(b) using file(1) and look the result string
"""
mime, encoding = None, None
if os.path.isfile(filename):
file_prog = find_program("file")
if file_prog:
mime, encoding = guess_mime_file_mime(file_prog, filename)
if mime is None:
mime = guess_mime_file_text(file_prog, filename)
return mime, encoding
def guess_mime_file_mime (file_prog, filename): def guess_mime_file_mime (file_prog, filename):
"""Determine MIME type of filename with file(1) and --mime option.""" """Determine MIME type of filename with file(1) and --mime option."""
mime, encoding = None, None mime, encoding = None, None
@ -195,7 +199,7 @@ FileText2Mime = {
"current ar archive": "application/x-archive", "current ar archive": "application/x-archive",
} }
def guess_mime_file (file_prog, filename): def guess_mime_file_text (file_prog, filename):
"""Determine MIME type of filename with file(1).""" """Determine MIME type of filename with file(1)."""
cmd = [file_prog, "--brief", filename] cmd = [file_prog, "--brief", filename]
try: try:

View File

@ -21,72 +21,122 @@ from tests import needs_program, datadir
class TestMime (unittest.TestCase): class TestMime (unittest.TestCase):
def mime_test (self, filename, mime, encoding): def mime_test (self, func, filename, mime, encoding):
"""Test that file has given mime and encoding.""" """Test that file has given mime and encoding as determined by
given function."""
archive = os.path.join(datadir, filename) archive = os.path.join(datadir, filename)
res = patoolib.util.guess_mime(archive) file_mime, file_encoding = func(archive)
fail_msg = "MIME type for archive `%s' should be (%s, %s), but was %s" % (filename, mime, encoding, res) fail_msg = "MIME type for archive `%s' should be (%s, %s), but was (%s, %s)" % (filename, mime, encoding, file_mime, file_encoding)
self.assertEqual(res, (mime, encoding), fail_msg) self.assertEqual((file_mime, file_encoding), (mime, encoding), fail_msg)
def mime_test_file (self, filename, mime, encoding):
"""Test that file has given mime and encoding as determined by
file(1)."""
self.mime_test(patoolib.util.guess_mime_file, filename, mime, encoding)
def mime_test_mimedb (self, filename, mime, encoding):
"""Test that file has given mime and encoding as determined by the
mimetypes module."""
self.mime_test(patoolib.util.guess_mime_mimedb, filename, mime, encoding)
@needs_program('file') @needs_program('file')
def test_mime (self): def test_mime_file (self):
self.mime_test("t.7z", "application/x-7z-compressed", None) self.mime_test_file("t.7z", "application/x-7z-compressed", None)
self.mime_test("t.7z.foo", "application/x-7z-compressed", None) self.mime_test_file("t.7z.foo", "application/x-7z-compressed", None)
self.mime_test("t.arj", "application/x-arj", None) self.mime_test_file("t.arj", "application/x-arj", None)
self.mime_test("t.arj.foo", "application/x-arj", None) self.mime_test_file("t.arj.foo", "application/x-arj", None)
self.mime_test("t.bz2", "application/x-bzip2", None) self.mime_test_file("t.bz2", "application/x-bzip2", None)
self.mime_test("t.bz2.foo", "application/x-bzip2", None) self.mime_test_file("t.bz2.foo", "application/x-bzip2", None)
self.mime_test("t.cab", "application/vnd.ms-cab-compressed", None) self.mime_test_file("t.cab", "application/vnd.ms-cab-compressed", None)
self.mime_test("t.cab.foo", "application/vnd.ms-cab-compressed", None) self.mime_test_file("t.cab.foo", "application/vnd.ms-cab-compressed", None)
self.mime_test("t.cpio", "application/x-cpio", None) self.mime_test_file("t.cpio", "application/x-cpio", None)
self.mime_test("t.cpio.foo", "application/x-cpio", None) self.mime_test_file("t.cpio.foo", "application/x-cpio", None)
self.mime_test("t.deb", "application/x-debian-package", None) self.mime_test_file("t.deb", "application/x-debian-package", None)
self.mime_test("t.deb.foo", "application/x-debian-package", None) self.mime_test_file("t.deb.foo", "application/x-debian-package", None)
self.mime_test("t.gz", "application/x-gzip", None) self.mime_test_file("t.gz", "application/x-gzip", None)
self.mime_test("t.gz.foo", "application/x-gzip", None) self.mime_test_file("t.gz.foo", "application/x-gzip", None)
self.mime_test("t.jar", "application/zip", None) self.mime_test_file("t.jar", "application/zip", None)
self.mime_test("t.jar.foo", "application/zip", None) self.mime_test_file("t.jar.foo", "application/zip", None)
self.mime_test("t.lzma", "application/x-lzma", None)
# file(1) does not recognize .lzma files # file(1) does not recognize .lzma files
#self.mime_test("t.lzma.foo", "application/x-lzma", None) #self.mime_test_file("t.lzma", "application/x-lzma", None)
self.mime_test("t.txt.lz", "application/x-lzip", None) #self.mime_test_file("t.lzma.foo", "application/x-lzma", None)
self.mime_test("t.txt.lz.foo", "application/x-lzip", None) self.mime_test_file("t.txt.lz", "application/x-lzip", None)
self.mime_test("t.lzo", "application/x-lzop", None) self.mime_test_file("t.txt.lz.foo", "application/x-lzip", None)
self.mime_test("t.lzo.foo", "application/x-lzop", None) self.mime_test_file("t.lzo", "application/x-lzop", None)
self.mime_test("t.rar", "application/x-rar", None) self.mime_test_file("t.lzo.foo", "application/x-lzop", None)
self.mime_test("t.rar.foo", "application/x-rar", None) self.mime_test_file("t.rar", "application/x-rar", None)
self.mime_test("t.rpm", "application/x-rpm", None) self.mime_test_file("t.rar.foo", "application/x-rar", None)
self.mime_test("t.rpm.foo", "application/x-rpm", None) self.mime_test_file("t.rpm", "application/x-rpm", None)
self.mime_test("t.tar", "application/x-tar", None) self.mime_test_file("t.rpm.foo", "application/x-rpm", None)
self.mime_test("t.tar.foo", "application/x-tar", None) self.mime_test_file("t.tar", "application/x-tar", None)
self.mime_test("t.tar.bz2", "application/x-tar", "bzip2") self.mime_test_file("t.tar.foo", "application/x-tar", None)
self.mime_test("t.tar.bz2.foo", "application/x-tar", "bzip2") self.mime_test_file("t.tar.bz2", "application/x-tar", "bzip2")
self.mime_test("t.tar.gz", "application/x-tar", "gzip") self.mime_test_file("t.tar.bz2.foo", "application/x-tar", "bzip2")
self.mime_test("t.tar.gz.foo", "application/x-tar", "gzip") self.mime_test_file("t.tar.gz", "application/x-tar", "gzip")
self.mime_test("t.tar.lzma", "application/x-tar", "lzma") self.mime_test_file("t.tar.gz.foo", "application/x-tar", "gzip")
# file(1) does not recognize .lzma files # file(1) does not recognize .lzma files
#self.mime_test("t.tar.lzma.foo", "application/x-tar", "lzma") #self.mime_test_file("t.tar.lzma", "application/x-tar", "lzma")
self.mime_test("t.tar.xz", "application/x-tar", "xz") #self.mime_test_file("t.tar.lzma.foo", "application/x-tar", "lzma")
self.mime_test("t.tar.xz.foo", "application/x-tar", "xz") self.mime_test_file("t.tar.xz", "application/x-tar", "xz")
self.mime_test("t.tar.lz", "application/x-tar", "lzip") self.mime_test_file("t.tar.xz.foo", "application/x-tar", "xz")
self.mime_test("t.tar.lz.foo", "application/x-tar", "lzip") self.mime_test_file("t.tar.lz", "application/x-tar", "lzip")
self.mime_test("t.tar.Z", "application/x-tar", "compress") self.mime_test_file("t.tar.lz.foo", "application/x-tar", "lzip")
self.mime_test("t.tar.Z.foo", "application/x-tar", "compress") self.mime_test_file("t.tar.Z", "application/x-tar", "compress")
self.mime_test("t.taz", "application/x-tar", "compress") self.mime_test_file("t.tar.Z.foo", "application/x-tar", "compress")
self.mime_test("t.taz.foo", "application/x-tar", "compress") self.mime_test_file("t.taz", "application/x-tar", "gzip")
self.mime_test("t.tbz2", "application/x-tar", "bzip2") self.mime_test_file("t.taz.foo", "application/x-tar", "gzip")
self.mime_test("t.tbz2.foo", "application/x-tar", "bzip2") self.mime_test_file("t.tbz2", "application/x-tar", "bzip2")
self.mime_test("t.tgz", "application/x-tar", "gzip") self.mime_test_file("t.tbz2.foo", "application/x-tar", "bzip2")
self.mime_test("t.tgz.foo", "application/x-tar", "gzip") self.mime_test_file("t.tgz", "application/x-tar", "gzip")
self.mime_test("t.txt.gz", "application/x-gzip", None) self.mime_test_file("t.tgz.foo", "application/x-tar", "gzip")
self.mime_test("t.txt.gz.foo", "application/x-gzip", None) self.mime_test_file("t.txt.gz", "application/x-gzip", None)
self.mime_test("t.xz", "application/x-xz", None) self.mime_test_file("t.txt.gz.foo", "application/x-gzip", None)
self.mime_test("t.xz.foo", "application/x-xz", None) self.mime_test_file("t.xz", "application/x-xz", None)
self.mime_test("t.Z", "application/x-compress", None) self.mime_test_file("t.xz.foo", "application/x-xz", None)
self.mime_test("t.Z.foo", "application/x-compress", None) self.mime_test_file("t.Z", "application/x-compress", None)
self.mime_test("t.zip", "application/zip", None) self.mime_test_file("t.Z.foo", "application/x-compress", None)
self.mime_test("t.zip.foo", "application/zip", None) self.mime_test_file("t.jar", "application/zip", None)
self.mime_test("t.ace", "application/x-ace", None) self.mime_test_file("t.jar.foo", "application/zip", None)
self.mime_test("t.ace.foo", "application/x-ace", None) self.mime_test_file("t.zip", "application/zip", None)
self.mime_test("t.a", "application/x-archive", None) self.mime_test_file("t.zip.foo", "application/zip", None)
self.mime_test("t.a.foo", "application/x-archive", None) self.mime_test_file("t.ace", "application/x-ace", None)
self.mime_test_file("t.ace.foo", "application/x-ace", None)
self.mime_test_file("t.a", "application/x-archive", None)
self.mime_test_file("t.a.foo", "application/x-archive", None)
self.mime_test_file("t.lha", "application/x-lha", None)
self.mime_test_file("t.lzh", "application/x-lha", None)
self.mime_test_file("t.lha.foo", "application/x-lha", None)
def test_mime_mimedb (self):
self.mime_test_mimedb("t.7z", "application/x-7z-compressed", None)
self.mime_test_mimedb("t.arj", "application/x-arj", None)
self.mime_test_mimedb("t.bz2", "application/x-bzip2", None)
self.mime_test_mimedb("t.cab", "application/x-cab", None)
self.mime_test_mimedb("t.cpio", "application/x-cpio", None)
self.mime_test_mimedb("t.deb", "application/x-debian-package", None)
self.mime_test_mimedb("t.gz", "application/x-gzip", None)
self.mime_test_mimedb("t.jar", "application/java-archive", None)
self.mime_test_mimedb("t.lzma", "application/x-lzma", None)
self.mime_test_mimedb("t.txt.lz", "application/x-lzip", None)
self.mime_test_mimedb("t.lzo", "application/x-lzop", None)
self.mime_test_mimedb("t.rar", "application/rar", None)
self.mime_test_mimedb("t.rpm", "application/x-redhat-package-manager", None)
self.mime_test_mimedb("t.tar", "application/x-tar", None)
self.mime_test_mimedb("t.tar.bz2", "application/x-tar", "bzip2")
self.mime_test_mimedb("t.tar.gz", "application/x-tar", "gzip")
self.mime_test_mimedb("t.tar.lzma", "application/x-tar", "lzma")
self.mime_test_mimedb("t.tar.xz", "application/x-tar", "xz")
self.mime_test_mimedb("t.tar.lz", "application/x-tar", "lzip")
self.mime_test_mimedb("t.tar.Z", "application/x-tar", "compress")
self.mime_test_mimedb("t.taz", "application/x-tar", "gzip")
self.mime_test_mimedb("t.tbz2", "application/x-tar", "bzip2")
self.mime_test_mimedb("t.tgz", "application/x-tar", "gzip")
self.mime_test_mimedb("t.txt.gz", "application/x-gzip", None)
self.mime_test_mimedb("t.xz", "application/x-xz", None)
self.mime_test_mimedb("t.Z", "application/x-compress", None)
self.mime_test_mimedb("t.zip", "application/zip", None)
self.mime_test_mimedb("t.ace", "application/x-ace", None)
self.mime_test_mimedb("t.a", "application/x-archive", None)
self.mime_test_mimedb("t.lha", "application/x-lha", None)
self.mime_test_mimedb("t.lzh", "application/x-lzh", None)