Using .NET, how can you find the mime type of a fi

2018-12-31 02:50发布

I am looking for a simple way to get a mime type where the file extension is incorrect or not given, something similar to this question only in .Net.

18条回答
素衣白纱
2楼-- · 2018-12-31 02:57

I came across the same issue and eventually opted for my own flavour of Kirk Baucom's solution, found here.

It seems to me that this is an opportunity for someone to write an online look-up service.

Anyway, Hope it helps.

查看更多
只若初见
3楼-- · 2018-12-31 03:01

This class use previous answers to try in 3 different ways: harcoded based on extension, FindMimeFromData API and using registry.

using System;
using System.Collections.Generic;
using System.IO;
using System.Runtime.InteropServices;

using Microsoft.Win32;

namespace YourNamespace
{
    public static class MimeTypeParser
    {
        [DllImport(@"urlmon.dll", CharSet = CharSet.Auto)]
        private extern static System.UInt32 FindMimeFromData(
                System.UInt32 pBC,
                [MarshalAs(UnmanagedType.LPStr)] System.String pwzUrl,
                [MarshalAs(UnmanagedType.LPArray)] byte[] pBuffer,
                System.UInt32 cbSize,
                [MarshalAs(UnmanagedType.LPStr)] System.String pwzMimeProposed,
                System.UInt32 dwMimeFlags,
                out System.UInt32 ppwzMimeOut,
                System.UInt32 dwReserverd
        );

        public static string GetMimeType(string sFilePath)
        {
            string sMimeType = GetMimeTypeFromList(sFilePath);

            if (String.IsNullOrEmpty(sMimeType))
            {
                sMimeType = GetMimeTypeFromFile(sFilePath);

                if (String.IsNullOrEmpty(sMimeType))
                {
                    sMimeType = GetMimeTypeFromRegistry(sFilePath);
                }
            }

            return sMimeType;
        }

        public static string GetMimeTypeFromList(string sFileNameOrPath)
        {
            string sMimeType = null;
            string sExtensionWithoutDot = Path.GetExtension(sFileNameOrPath).Substring(1).ToLower();

            if (!String.IsNullOrEmpty(sExtensionWithoutDot) && spDicMIMETypes.ContainsKey(sExtensionWithoutDot))
            {
                sMimeType = spDicMIMETypes[sExtensionWithoutDot];
            }

            return sMimeType;
        }

        public static string GetMimeTypeFromRegistry(string sFileNameOrPath)
        {
            string sMimeType = null;
            string sExtension = Path.GetExtension(sFileNameOrPath).ToLower();
            RegistryKey pKey = Registry.ClassesRoot.OpenSubKey(sExtension);

            if (pKey != null && pKey.GetValue("Content Type") != null)
            {
                sMimeType = pKey.GetValue("Content Type").ToString();
            }

            return sMimeType;
        }

        public static string GetMimeTypeFromFile(string sFilePath)
        {
            string sMimeType = null;

            if (File.Exists(sFilePath))
            {
                byte[] abytBuffer = new byte[256];

                using (FileStream pFileStream = new FileStream(sFilePath, FileMode.Open))
                {
                    if (pFileStream.Length >= 256)
                    {
                        pFileStream.Read(abytBuffer, 0, 256);
                    }
                    else
                    {
                        pFileStream.Read(abytBuffer, 0, (int)pFileStream.Length);
                    }
                }

                try
                {
                    UInt32 unMimeType;

                    FindMimeFromData(0, null, abytBuffer, 256, null, 0, out unMimeType, 0);

                    IntPtr pMimeType = new IntPtr(unMimeType);
                    string sMimeTypeFromFile = Marshal.PtrToStringUni(pMimeType);

                    Marshal.FreeCoTaskMem(pMimeType);

                    if (!String.IsNullOrEmpty(sMimeTypeFromFile) && sMimeTypeFromFile != "text/plain" && sMimeTypeFromFile != "application/octet-stream")
                    {
                        sMimeType = sMimeTypeFromFile;
                    }
                }
                catch {}
            }

            return sMimeType;
        }

        private static readonly Dictionary<string, string> spDicMIMETypes = new Dictionary<string, string>
        {
            {"ai", "application/postscript"},
            {"aif", "audio/x-aiff"},
            {"aifc", "audio/x-aiff"},
            {"aiff", "audio/x-aiff"},
            {"asc", "text/plain"},
            {"atom", "application/atom+xml"},
            {"au", "audio/basic"},
            {"avi", "video/x-msvideo"},
            {"bcpio", "application/x-bcpio"},
            {"bin", "application/octet-stream"},
            {"bmp", "image/bmp"},
            {"cdf", "application/x-netcdf"},
            {"cgm", "image/cgm"},
            {"class", "application/octet-stream"},
            {"cpio", "application/x-cpio"},
            {"cpt", "application/mac-compactpro"},
            {"csh", "application/x-csh"},
            {"css", "text/css"},
            {"dcr", "application/x-director"},
            {"dif", "video/x-dv"},
            {"dir", "application/x-director"},
            {"djv", "image/vnd.djvu"},
            {"djvu", "image/vnd.djvu"},
            {"dll", "application/octet-stream"},
            {"dmg", "application/octet-stream"},
            {"dms", "application/octet-stream"},
            {"doc", "application/msword"},
            {"docx","application/vnd.openxmlformats-officedocument.wordprocessingml.document"},
            {"dotx", "application/vnd.openxmlformats-officedocument.wordprocessingml.template"},
            {"docm","application/vnd.ms-word.document.macroEnabled.12"},
            {"dotm","application/vnd.ms-word.template.macroEnabled.12"},
            {"dtd", "application/xml-dtd"},
            {"dv", "video/x-dv"},
            {"dvi", "application/x-dvi"},
            {"dxr", "application/x-director"},
            {"eps", "application/postscript"},
            {"etx", "text/x-setext"},
            {"exe", "application/octet-stream"},
            {"ez", "application/andrew-inset"},
            {"gif", "image/gif"},
            {"gram", "application/srgs"},
            {"grxml", "application/srgs+xml"},
            {"gtar", "application/x-gtar"},
            {"hdf", "application/x-hdf"},
            {"hqx", "application/mac-binhex40"},
            {"htc", "text/x-component"},
            {"htm", "text/html"},
            {"html", "text/html"},
            {"ice", "x-conference/x-cooltalk"},
            {"ico", "image/x-icon"},
            {"ics", "text/calendar"},
            {"ief", "image/ief"},
            {"ifb", "text/calendar"},
            {"iges", "model/iges"},
            {"igs", "model/iges"},
            {"jnlp", "application/x-java-jnlp-file"},
            {"jp2", "image/jp2"},
            {"jpe", "image/jpeg"},
            {"jpeg", "image/jpeg"},
            {"jpg", "image/jpeg"},
            {"js", "application/x-javascript"},
            {"kar", "audio/midi"},
            {"latex", "application/x-latex"},
            {"lha", "application/octet-stream"},
            {"lzh", "application/octet-stream"},
            {"m3u", "audio/x-mpegurl"},
            {"m4a", "audio/mp4a-latm"},
            {"m4b", "audio/mp4a-latm"},
            {"m4p", "audio/mp4a-latm"},
            {"m4u", "video/vnd.mpegurl"},
            {"m4v", "video/x-m4v"},
            {"mac", "image/x-macpaint"},
            {"man", "application/x-troff-man"},
            {"mathml", "application/mathml+xml"},
            {"me", "application/x-troff-me"},
            {"mesh", "model/mesh"},
            {"mid", "audio/midi"},
            {"midi", "audio/midi"},
            {"mif", "application/vnd.mif"},
            {"mov", "video/quicktime"},
            {"movie", "video/x-sgi-movie"},
            {"mp2", "audio/mpeg"},
            {"mp3", "audio/mpeg"},
            {"mp4", "video/mp4"},
            {"mpe", "video/mpeg"},
            {"mpeg", "video/mpeg"},
            {"mpg", "video/mpeg"},
            {"mpga", "audio/mpeg"},
            {"ms", "application/x-troff-ms"},
            {"msh", "model/mesh"},
            {"mxu", "video/vnd.mpegurl"},
            {"nc", "application/x-netcdf"},
            {"oda", "application/oda"},
            {"ogg", "application/ogg"},
            {"pbm", "image/x-portable-bitmap"},
            {"pct", "image/pict"},
            {"pdb", "chemical/x-pdb"},
            {"pdf", "application/pdf"},
            {"pgm", "image/x-portable-graymap"},
            {"pgn", "application/x-chess-pgn"},
            {"pic", "image/pict"},
            {"pict", "image/pict"},
            {"png", "image/png"}, 
            {"pnm", "image/x-portable-anymap"},
            {"pnt", "image/x-macpaint"},
            {"pntg", "image/x-macpaint"},
            {"ppm", "image/x-portable-pixmap"},
            {"ppt", "application/vnd.ms-powerpoint"},
            {"pptx","application/vnd.openxmlformats-officedocument.presentationml.presentation"},
            {"potx","application/vnd.openxmlformats-officedocument.presentationml.template"},
            {"ppsx","application/vnd.openxmlformats-officedocument.presentationml.slideshow"},
            {"ppam","application/vnd.ms-powerpoint.addin.macroEnabled.12"},
            {"pptm","application/vnd.ms-powerpoint.presentation.macroEnabled.12"},
            {"potm","application/vnd.ms-powerpoint.template.macroEnabled.12"},
            {"ppsm","application/vnd.ms-powerpoint.slideshow.macroEnabled.12"},
            {"ps", "application/postscript"},
            {"qt", "video/quicktime"},
            {"qti", "image/x-quicktime"},
            {"qtif", "image/x-quicktime"},
            {"ra", "audio/x-pn-realaudio"},
            {"ram", "audio/x-pn-realaudio"},
            {"ras", "image/x-cmu-raster"},
            {"rdf", "application/rdf+xml"},
            {"rgb", "image/x-rgb"},
            {"rm", "application/vnd.rn-realmedia"},
            {"roff", "application/x-troff"},
            {"rtf", "text/rtf"},
            {"rtx", "text/richtext"},
            {"sgm", "text/sgml"},
            {"sgml", "text/sgml"},
            {"sh", "application/x-sh"},
            {"shar", "application/x-shar"},
            {"silo", "model/mesh"},
            {"sit", "application/x-stuffit"},
            {"skd", "application/x-koan"},
            {"skm", "application/x-koan"},
            {"skp", "application/x-koan"},
            {"skt", "application/x-koan"},
            {"smi", "application/smil"},
            {"smil", "application/smil"},
            {"snd", "audio/basic"},
            {"so", "application/octet-stream"},
            {"spl", "application/x-futuresplash"},
            {"src", "application/x-wais-source"},
            {"sv4cpio", "application/x-sv4cpio"},
            {"sv4crc", "application/x-sv4crc"},
            {"svg", "image/svg+xml"},
            {"swf", "application/x-shockwave-flash"},
            {"t", "application/x-troff"},
            {"tar", "application/x-tar"},
            {"tcl", "application/x-tcl"},
            {"tex", "application/x-tex"},
            {"texi", "application/x-texinfo"},
            {"texinfo", "application/x-texinfo"},
            {"tif", "image/tiff"},
            {"tiff", "image/tiff"},
            {"tr", "application/x-troff"},
            {"tsv", "text/tab-separated-values"},
            {"txt", "text/plain"},
            {"ustar", "application/x-ustar"},
            {"vcd", "application/x-cdlink"},
            {"vrml", "model/vrml"},
            {"vxml", "application/voicexml+xml"},
            {"wav", "audio/x-wav"},
            {"wbmp", "image/vnd.wap.wbmp"},
            {"wbmxl", "application/vnd.wap.wbxml"},
            {"wml", "text/vnd.wap.wml"},
            {"wmlc", "application/vnd.wap.wmlc"},
            {"wmls", "text/vnd.wap.wmlscript"},
            {"wmlsc", "application/vnd.wap.wmlscriptc"},
            {"wrl", "model/vrml"},
            {"xbm", "image/x-xbitmap"},
            {"xht", "application/xhtml+xml"},
            {"xhtml", "application/xhtml+xml"},
            {"xls", "application/vnd.ms-excel"},                                                
            {"xml", "application/xml"},
            {"xpm", "image/x-xpixmap"},
            {"xsl", "application/xml"},
            {"xlsx","application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"},
            {"xltx","application/vnd.openxmlformats-officedocument.spreadsheetml.template"},
            {"xlsm","application/vnd.ms-excel.sheet.macroEnabled.12"},
            {"xltm","application/vnd.ms-excel.template.macroEnabled.12"},
            {"xlam","application/vnd.ms-excel.addin.macroEnabled.12"},
            {"xlsb","application/vnd.ms-excel.sheet.binary.macroEnabled.12"},
            {"xslt", "application/xslt+xml"},
            {"xul", "application/vnd.mozilla.xul+xml"},
            {"xwd", "image/x-xwindowdump"},
            {"xyz", "chemical/x-xyz"},
            {"zip", "application/zip"}
        };
    }
}
查看更多
笑指拈花
4楼-- · 2018-12-31 03:02

I think the right answer is a combination of Steve Morgan's and Serguei's answers. That's how Internet Explorer does it. The pinvoke call to FindMimeFromData works for only 26 hard-coded mime types. Also, it will give ambigous mime types (such as text/plain or application/octet-stream) even though there may exist a more specific, more appropriate mime type. If it fails to give a good mime type, you can go to the registry for a more specific mime type. The server registry could have more up-to-date mime types.

Refer to: http://msdn.microsoft.com/en-us/library/ms775147(VS.85).aspx

查看更多
妖精总统
5楼-- · 2018-12-31 03:03

I did use urlmon.dll in the end. I thought there would be an easier way but this works. I include the code to help anyone else and allow me to find it again if I need it.

using System.Runtime.InteropServices;

...

    [DllImport(@"urlmon.dll", CharSet = CharSet.Auto)]
    private extern static System.UInt32 FindMimeFromData(
        System.UInt32 pBC,
        [MarshalAs(UnmanagedType.LPStr)] System.String pwzUrl,
        [MarshalAs(UnmanagedType.LPArray)] byte[] pBuffer,
        System.UInt32 cbSize,
        [MarshalAs(UnmanagedType.LPStr)] System.String pwzMimeProposed,
        System.UInt32 dwMimeFlags,
        out System.UInt32 ppwzMimeOut,
        System.UInt32 dwReserverd
    );

    public static string getMimeFromFile(string filename)
    {
        if (!File.Exists(filename))
            throw new FileNotFoundException(filename + " not found");

        byte[] buffer = new byte[256];
        using (FileStream fs = new FileStream(filename, FileMode.Open))
        {
            if (fs.Length >= 256)
                fs.Read(buffer, 0, 256);
            else
                fs.Read(buffer, 0, (int)fs.Length);
        }
        try
        {
            System.UInt32 mimetype;
            FindMimeFromData(0, null, buffer, 256, null, 0, out mimetype, 0);
            System.IntPtr mimeTypePtr = new IntPtr(mimetype);
            string mime = Marshal.PtrToStringUni(mimeTypePtr);
            Marshal.FreeCoTaskMem(mimeTypePtr);
            return mime;
        }
        catch (Exception e)
        {
            return "unknown/unknown";
        }
    }
查看更多
深知你不懂我心
6楼-- · 2018-12-31 03:03

@Steve Morgan and @Richard Gourlay this is a great solution, thank you for that. One small drawback is that when the number of bytes in a file is 255 or below, the mime type will sometimes yield "application/octet-stream", which is slightly inaccurate for files which would be expected to yield "text/plain". I have updated your original method to account for this situation as follows:

If the number of bytes in the file is less than or equal to 255 and the deduced mime type is "application/octet-stream", then create a new byte array that consists of the original file bytes repeated n-times until the total number of bytes is >= 256. Then re-check the mime-type on that new byte array.

Modified method:

Imports System.Runtime.InteropServices

<DllImport("urlmon.dll", CharSet:=CharSet.Auto)> _
Private Shared Function FindMimeFromData(pBC As System.UInt32, <MarshalAs(UnmanagedType.LPStr)> pwzUrl As System.String, <MarshalAs(UnmanagedType.LPArray)> pBuffer As Byte(), cbSize As System.UInt32, <MarshalAs(UnmanagedType.LPStr)> pwzMimeProposed As System.String, dwMimeFlags As System.UInt32, _
ByRef ppwzMimeOut As System.UInt32, dwReserverd As System.UInt32) As System.UInt32
End Function
Private Function GetMimeType(ByVal f As FileInfo) As String
    'See http://stackoverflow.com/questions/58510/using-net-how-can-you-find-the-mime-type-of-a-file-based-on-the-file-signature
    Dim returnValue As String = ""
    Dim fileStream As FileStream = Nothing
    Dim fileStreamLength As Long = 0
    Dim fileStreamIsLessThanBByteSize As Boolean = False

    Const byteSize As Integer = 255
    Const bbyteSize As Integer = byteSize + 1

    Const ambiguousMimeType As String = "application/octet-stream"
    Const unknownMimeType As String = "unknown/unknown"

    Dim buffer As Byte() = New Byte(byteSize) {}
    Dim fnGetMimeTypeValue As New Func(Of Byte(), Integer, String)(
        Function(_buffer As Byte(), _bbyteSize As Integer) As String
            Dim _returnValue As String = ""
            Dim mimeType As UInt32 = 0
            FindMimeFromData(0, Nothing, _buffer, _bbyteSize, Nothing, 0, mimeType, 0)
            Dim mimeTypePtr As IntPtr = New IntPtr(mimeType)
            _returnValue = Marshal.PtrToStringUni(mimeTypePtr)
            Marshal.FreeCoTaskMem(mimeTypePtr)
            Return _returnValue
        End Function)

    If (f.Exists()) Then
        Try
            fileStream = New FileStream(f.FullName(), FileMode.Open, FileAccess.Read, FileShare.ReadWrite)
            fileStreamLength = fileStream.Length()

            If (fileStreamLength >= bbyteSize) Then
                fileStream.Read(buffer, 0, bbyteSize)
            Else
                fileStreamIsLessThanBByteSize = True
                fileStream.Read(buffer, 0, CInt(fileStreamLength))
            End If

            returnValue = fnGetMimeTypeValue(buffer, bbyteSize)

            If (returnValue.Equals(ambiguousMimeType, StringComparison.OrdinalIgnoreCase) AndAlso fileStreamIsLessThanBByteSize AndAlso fileStreamLength > 0) Then
                'Duplicate the stream content until the stream length is >= bbyteSize to get a more deterministic mime type analysis.
                Dim currentBuffer As Byte() = buffer.Take(fileStreamLength).ToArray()
                Dim repeatCount As Integer = Math.Floor((bbyteSize / fileStreamLength) + 1)
                Dim bBufferList As List(Of Byte) = New List(Of Byte)
                While (repeatCount > 0)
                    bBufferList.AddRange(currentBuffer)
                    repeatCount -= 1
                End While
                Dim bbuffer As Byte() = bBufferList.Take(bbyteSize).ToArray()
                returnValue = fnGetMimeTypeValue(bbuffer, bbyteSize)
            End If
        Catch ex As Exception
            returnValue = unknownMimeType
        Finally
            If (fileStream IsNot Nothing) Then fileStream.Close()
        End Try
    End If
    Return returnValue
End Function
查看更多
泪湿衣
7楼-- · 2018-12-31 03:03

I ended up using Winista MimeDetector from Netomatix. The sources can be downloaded for free after you created an account: http://www.netomatix.com/Products/DocumentManagement/MimeDetector.aspx

MimeTypes g_MimeTypes = new MimeTypes("mime-types.xml");
sbyte [] fileData = null;

using (System.IO.FileStream srcFile = new System.IO.FileStream(strFile, System.IO.FileMode.Open))
{
    byte [] data = new byte[srcFile.Length];
    srcFile.Read(data, 0, (Int32)srcFile.Length);
    fileData = Winista.Mime.SupportUtil.ToSByteArray(data);
}

MimeType oMimeType = g_MimeTypes.GetMimeType(fileData);

This is part of another question answered here: Alternative to FindMimeFromData method in Urlmon.dll one which has more MIME types The best solution to this problem in my opinion.

查看更多
登录 后发表回答