关于c#:使用.NET,如何根据文件签名而不是扩展名找到文件的mime类型

关于c#:使用.NET,如何根据文件签名而不是扩展名找到文件的mime类型

Using .NET, how can you find the mime type of a file based on the file signature not the extension

我正在寻找一种简单的方法来获取文件扩展名不正确或未给出的mime类型,类似于仅在.Net中的此问题。


最后我确实使用了urlmon.dll。我以为会有一个更简单的方法,但这可行。我提供了可帮助其他人的代码,并在需要时允许我再次找到它。

1
using System.Runtime.InteropServices;

...

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
    [DllImport(@"urlmon.dll", CharSet = CharSet.Auto)]
    private extern static System.UInt32 FindMimeFromData(
        System.UInt32 pBC,
        [MarshalAs(UnmanagedType.LPStr)] System.String pwzUrl,
        [MarshalAs(UnmanagedType.LPArray)] byte[] pBuffer,
        System.UInt32 cbSize,
        [MarshalAs(UnmanagedType.LPStr)] System.String pwzMimeProposed,
        System.UInt32 dwMimeFlags,
        out System.UInt32 ppwzMimeOut,
        System.UInt32 dwReserverd
    );

    public static string getMimeFromFile(string filename)
    {
        if (!File.Exists(filename))
            throw new FileNotFoundException(filename +" not found");

        byte[] buffer = new byte[256];
        using (FileStream fs = new FileStream(filename, FileMode.Open))
        {
            if (fs.Length >= 256)
                fs.Read(buffer, 0, 256);
            else
                fs.Read(buffer, 0, (int)fs.Length);
        }
        try
        {
            System.UInt32 mimetype;
            FindMimeFromData(0, null, buffer, 256, null, 0, out mimetype, 0);
            System.IntPtr mimeTypePtr = new IntPtr(mimetype);
            string mime = Marshal.PtrToStringUni(mimeTypePtr);
            Marshal.FreeCoTaskMem(mimeTypePtr);
            return mime;
        }
        catch (Exception e)
        {
            return"unknown/unknown";
        }
    }

我找到了一个硬编码的解决方案,希望我能对某人有所帮助:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
public static class MIMEAssistant
{
  private static readonly Dictionary<string, string> MIMETypesDictionary = new Dictionary<string, string>
  {
    {"ai","application/postscript"},
    {"aif","audio/x-aiff"},
    {"aifc","audio/x-aiff"},
    {"aiff","audio/x-aiff"},
    {"asc","text/plain"},
    {"atom","application/atom+xml"},
    {"au","audio/basic"},
    {"avi","video/x-msvideo"},
    {"bcpio","application/x-bcpio"},
    {"bin","application/octet-stream"},
    {"bmp","image/bmp"},
    {"cdf","application/x-netcdf"},
    {"cgm","image/cgm"},
    {"class","application/octet-stream"},
    {"cpio","application/x-cpio"},
    {"cpt","application/mac-compactpro"},
    {"csh","application/x-csh"},
    {"css","text/css"},
    {"dcr","application/x-director"},
    {"dif","video/x-dv"},
    {"dir","application/x-director"},
    {"djv","image/vnd.djvu"},
    {"djvu","image/vnd.djvu"},
    {"dll","application/octet-stream"},
    {"dmg","application/octet-stream"},
    {"dms","application/octet-stream"},
    {"doc","application/msword"},
    {"docx","application/vnd.openxmlformats-officedocument.wordprocessingml.document"},
    {"dotx","application/vnd.openxmlformats-officedocument.wordprocessingml.template"},
    {"docm","application/vnd.ms-word.document.macroEnabled.12"},
    {"dotm","application/vnd.ms-word.template.macroEnabled.12"},
    {"dtd","application/xml-dtd"},
    {"dv","video/x-dv"},
    {"dvi","application/x-dvi"},
    {"dxr","application/x-director"},
    {"eps","application/postscript"},
    {"etx","text/x-setext"},
    {"exe","application/octet-stream"},
    {"ez","application/andrew-inset"},
    {"gif","image/gif"},
    {"gram","application/srgs"},
    {"grxml","application/srgs+xml"},
    {"gtar","application/x-gtar"},
    {"hdf","application/x-hdf"},
    {"hqx","application/mac-binhex40"},
    {"htm","text/html"},
    {"html","text/html"},
    {"ice","x-conference/x-cooltalk"},
    {"ico","image/x-icon"},
    {"ics","text/calendar"},
    {"ief","image/ief"},
    {"ifb","text/calendar"},
    {"iges","model/iges"},
    {"igs","model/iges"},
    {"jnlp","application/x-java-jnlp-file"},
    {"jp2","image/jp2"},
    {"jpe","image/jpeg"},
    {"jpeg","image/jpeg"},
    {"jpg","image/jpeg"},
    {"js","application/x-javascript"},
    {"kar","audio/midi"},
    {"latex","application/x-latex"},
    {"lha","application/octet-stream"},
    {"lzh","application/octet-stream"},
    {"m3u","audio/x-mpegurl"},
    {"m4a","audio/mp4a-latm"},
    {"m4b","audio/mp4a-latm"},
    {"m4p","audio/mp4a-latm"},
    {"m4u","video/vnd.mpegurl"},
    {"m4v","video/x-m4v"},
    {"mac","image/x-macpaint"},
    {"man","application/x-troff-man"},
    {"mathml","application/mathml+xml"},
    {"me","application/x-troff-me"},
    {"mesh","model/mesh"},
    {"mid","audio/midi"},
    {"midi","audio/midi"},
    {"mif","application/vnd.mif"},
    {"mov","video/quicktime"},
    {"movie","video/x-sgi-movie"},
    {"mp2","audio/mpeg"},
    {"mp3","audio/mpeg"},
    {"mp4","video/mp4"},
    {"mpe","video/mpeg"},
    {"mpeg","video/mpeg"},
    {"mpg","video/mpeg"},
    {"mpga","audio/mpeg"},
    {"ms","application/x-troff-ms"},
    {"msh","model/mesh"},
    {"mxu","video/vnd.mpegurl"},
    {"nc","application/x-netcdf"},
    {"oda","application/oda"},
    {"ogg","application/ogg"},
    {"pbm","image/x-portable-bitmap"},
    {"pct","image/pict"},
    {"pdb","chemical/x-pdb"},
    {"pdf","application/pdf"},
    {"pgm","image/x-portable-graymap"},
    {"pgn","application/x-chess-pgn"},
    {"pic","image/pict"},
    {"pict","image/pict"},
    {"png","image/png"},
    {"pnm","image/x-portable-anymap"},
    {"pnt","image/x-macpaint"},
    {"pntg","image/x-macpaint"},
    {"ppm","image/x-portable-pixmap"},
    {"ppt","application/vnd.ms-powerpoint"},
    {"pptx","application/vnd.openxmlformats-officedocument.presentationml.presentation"},
    {"potx","application/vnd.openxmlformats-officedocument.presentationml.template"},
    {"ppsx","application/vnd.openxmlformats-officedocument.presentationml.slideshow"},
    {"ppam","application/vnd.ms-powerpoint.addin.macroEnabled.12"},
    {"pptm","application/vnd.ms-powerpoint.presentation.macroEnabled.12"},
    {"potm","application/vnd.ms-powerpoint.template.macroEnabled.12"},
    {"ppsm","application/vnd.ms-powerpoint.slideshow.macroEnabled.12"},
    {"ps","application/postscript"},
    {"qt","video/quicktime"},
    {"qti","image/x-quicktime"},
    {"qtif","image/x-quicktime"},
    {"ra","audio/x-pn-realaudio"},
    {"ram","audio/x-pn-realaudio"},
    {"ras","image/x-cmu-raster"},
    {"rdf","application/rdf+xml"},
    {"rgb","image/x-rgb"},
    {"rm","application/vnd.rn-realmedia"},
    {"roff","application/x-troff"},
    {"rtf","text/rtf"},
    {"rtx","text/richtext"},
    {"sgm","text/sgml"},
    {"sgml","text/sgml"},
    {"sh","application/x-sh"},
    {"shar","application/x-shar"},
    {"silo","model/mesh"},
    {"sit","application/x-stuffit"},
    {"skd","application/x-koan"},
    {"skm","application/x-koan"},
    {"skp","application/x-koan"},
    {"skt","application/x-koan"},
    {"smi","application/smil"},
    {"smil","application/smil"},
    {"snd","audio/basic"},
    {"so","application/octet-stream"},
    {"spl","application/x-futuresplash"},
    {"src","application/x-wais-source"},
    {"sv4cpio","application/x-sv4cpio"},
    {"sv4crc","application/x-sv4crc"},
    {"svg","image/svg+xml"},
    {"swf","application/x-shockwave-flash"},
    {"t","application/x-troff"},
    {"tar","application/x-tar"},
    {"tcl","application/x-tcl"},
    {"tex","application/x-tex"},
    {"texi","application/x-texinfo"},
    {"texinfo","application/x-texinfo"},
    {"tif","image/tiff"},
    {"tiff","image/tiff"},
    {"tr","application/x-troff"},
    {"tsv","text/tab-separated-values"},
    {"txt","text/plain"},
    {"ustar","application/x-ustar"},
    {"vcd","application/x-cdlink"},
    {"vrml","model/vrml"},
    {"vxml","application/voicexml+xml"},
    {"wav","audio/x-wav"},
    {"wbmp","image/vnd.wap.wbmp"},
    {"wbmxl","application/vnd.wap.wbxml"},
    {"wml","text/vnd.wap.wml"},
    {"wmlc","application/vnd.wap.wmlc"},
    {"wmls","text/vnd.wap.wmlscript"},
    {"wmlsc","application/vnd.wap.wmlscriptc"},
    {"wrl","model/vrml"},
    {"xbm","image/x-xbitmap"},
    {"xht","application/xhtml+xml"},
    {"xhtml","application/xhtml+xml"},
    {"xls","application/vnd.ms-excel"},                        
    {"xml","application/xml"},
    {"xpm","image/x-xpixmap"},
    {"xsl","application/xml"},
    {"xlsx","application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"},
    {"xltx","application/vnd.openxmlformats-officedocument.spreadsheetml.template"},
    {"xlsm","application/vnd.ms-excel.sheet.macroEnabled.12"},
    {"xltm","application/vnd.ms-excel.template.macroEnabled.12"},
    {"xlam","application/vnd.ms-excel.addin.macroEnabled.12"},
    {"xlsb","application/vnd.ms-excel.sheet.binary.macroEnabled.12"},
    {"xslt","application/xslt+xml"},
    {"xul","application/vnd.mozilla.xul+xml"},
    {"xwd","image/x-xwindowdump"},
    {"xyz","chemical/x-xyz"},
    {"zip","application/zip"}
  };

  public static string GetMIMEType(string fileName)
  {
    //get file extension
    string extension = Path.GetExtension(fileName).ToLowerInvariant();

    if (extension.Length > 0 &&
        MIMETypesDictionary.ContainsKey(extension.Remove(0, 1)))
    {
      return MIMETypesDictionary[extension.Remove(0, 1)];
    }
    return"unknown/unknown";
  }
}


编辑:只需使用Mime侦探

我使用字节数组序列来确定给定文件的正确MIME类型。与仅查看文件名的文件扩展名相比,这样做的好处是,如果用户重命名文件以绕过某些文件类型上载限制,则文件名扩展名将无法捕获此文件。另一方面,通过字节数组获取文件签名将阻止这种调皮的行为的发生。

这是C#中的示例:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
public class MimeType
{
    private static readonly byte[] BMP = { 66, 77 };
    private static readonly byte[] DOC = { 208, 207, 17, 224, 161, 177, 26, 225 };
    private static readonly byte[] EXE_DLL = { 77, 90 };
    private static readonly byte[] GIF = { 71, 73, 70, 56 };
    private static readonly byte[] ICO = { 0, 0, 1, 0 };
    private static readonly byte[] JPG = { 255, 216, 255 };
    private static readonly byte[] MP3 = { 255, 251, 48 };
    private static readonly byte[] OGG = { 79, 103, 103, 83, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0 };
    private static readonly byte[] PDF = { 37, 80, 68, 70, 45, 49, 46 };
    private static readonly byte[] PNG = { 137, 80, 78, 71, 13, 10, 26, 10, 0, 0, 0, 13, 73, 72, 68, 82 };
    private static readonly byte[] RAR = { 82, 97, 114, 33, 26, 7, 0 };
    private static readonly byte[] SWF = { 70, 87, 83 };
    private static readonly byte[] TIFF = { 73, 73, 42, 0 };
    private static readonly byte[] TORRENT = { 100, 56, 58, 97, 110, 110, 111, 117, 110, 99, 101 };
    private static readonly byte[] TTF = { 0, 1, 0, 0, 0 };
    private static readonly byte[] WAV_AVI = { 82, 73, 70, 70 };
    private static readonly byte[] WMV_WMA = { 48, 38, 178, 117, 142, 102, 207, 17, 166, 217, 0, 170, 0, 98, 206, 108 };
    private static readonly byte[] ZIP_DOCX = { 80, 75, 3, 4 };

    public static string GetMimeType(byte[] file, string fileName)
    {

        string mime ="application/octet-stream"; //DEFAULT UNKNOWN MIME TYPE

        //Ensure that the filename isn't empty or null
        if (string.IsNullOrWhiteSpace(fileName))
        {
            return mime;
        }

        //Get the file extension
        string extension = Path.GetExtension(fileName) == null
                               ? string.Empty
                               : Path.GetExtension(fileName).ToUpper();

        //Get the MIME Type
        if (file.Take(2).SequenceEqual(BMP))
        {
            mime ="image/bmp";
        }
        else if (file.Take(8).SequenceEqual(DOC))
        {
            mime ="application/msword";
        }
        else if (file.Take(2).SequenceEqual(EXE_DLL))
        {
            mime ="application/x-msdownload"; //both use same mime type
        }
        else if (file.Take(4).SequenceEqual(GIF))
        {
            mime ="image/gif";
        }
        else if (file.Take(4).SequenceEqual(ICO))
        {
            mime ="image/x-icon";
        }
        else if (file.Take(3).SequenceEqual(JPG))
        {
            mime ="image/jpeg";
        }
        else if (file.Take(3).SequenceEqual(MP3))
        {
            mime ="audio/mpeg";
        }
        else if (file.Take(14).SequenceEqual(OGG))
        {
            if (extension ==".OGX")
            {
                mime ="application/ogg";
            }
            else if (extension ==".OGA")
            {
                mime ="audio/ogg";
            }
            else
            {
                mime ="video/ogg";
            }
        }
        else if (file.Take(7).SequenceEqual(PDF))
        {
            mime ="application/pdf";
        }
        else if (file.Take(16).SequenceEqual(PNG))
        {
            mime ="image/png";
        }
        else if (file.Take(7).SequenceEqual(RAR))
        {
            mime ="application/x-rar-compressed";
        }
        else if (file.Take(3).SequenceEqual(SWF))
        {
            mime ="application/x-shockwave-flash";
        }
        else if (file.Take(4).SequenceEqual(TIFF))
        {
            mime ="image/tiff";
        }
        else if (file.Take(11).SequenceEqual(TORRENT))
        {
            mime ="application/x-bittorrent";
        }
        else if (file.Take(5).SequenceEqual(TTF))
        {
            mime ="application/x-font-ttf";
        }
        else if (file.Take(4).SequenceEqual(WAV_AVI))
        {
            mime = extension ==".AVI" ?"video/x-msvideo" :"audio/x-wav";
        }
        else if (file.Take(16).SequenceEqual(WMV_WMA))
        {
            mime = extension ==".WMA" ?"audio/x-ms-wma" :"video/x-ms-wmv";
        }
        else if (file.Take(4).SequenceEqual(ZIP_DOCX))
        {
            mime = extension ==".DOCX" ?"application/vnd.openxmlformats-officedocument.wordprocessingml.document" :"application/x-zip-compressed";
        }

        return mime;
    }


}

注意,由于DOCX实际上只是一个ZIP文件,因此我对DOCX文件类型的处理方式有所不同。在这种情况下,只要验证了文件扩展名的顺序,我就可以简单地对其进行检查。对于某些人来说,该示例还远远不够完整,但是您可以轻松添加自己的示例。

如果要添加更多的MIME类型,则可以从此处获取许多不同文件类型的字节数组序列。此外,这是有关文件签名的另一个很好的资源。

如果所有其他操作都失败了,我经常要做的就是逐步浏览我正在寻找的特定类型的几个文件,并在文件的字节序列中寻找模式。最后,这仍然是基本验证,不能用于100%确定文件类型的证明。


在Urlmon.dll中,有一个名为FindMimeFromData的函数。

从文档中

MIME type detection, or"data sniffing," refers to the process of determining an appropriate MIME type from binary data. The final result depends on a combination of server-supplied MIME type headers, file extension, and/or the data itself. Usually, only the first 256 bytes of data are significant.

因此,从文件中读取第一个(最多)256个字节,并将其传递给FindMimeFromData


如果您使用的是.NET Framework 4.5或更高版本,则现在有一个MimeMapping.GetMimeMapping(filename)方法,该方法将为传递的文件名返回具有正确Mime映射的字符串。请注意,这使用文件扩展名,而不是文件本身中的数据。

文档位于http://msdn.microsoft.com/zh-cn/library/system.web.mimemapping.getmimemapping


您也可以在注册表中查找。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
    using System.IO;
    using Microsoft.Win32;

    string GetMimeType(FileInfo fileInfo)
    {
        string mimeType ="application/unknown";

        RegistryKey regKey = Registry.ClassesRoot.OpenSubKey(
            fileInfo.Extension.ToLower()
            );

        if(regKey != null)
        {
            object contentType = regKey.GetValue("Content Type");

            if(contentType != null)
                mimeType = contentType.ToString();
        }

        return mimeType;
    }

您将不得不采用一种或另一种方式进入MIME数据库-无论是从扩展名还是从幻数映射它们,都是微不足道的-Windows注册表就是其中之一。
对于独立于平台的解决方案,尽管必须将该DB附带代码(或作为独立库)。


我使用混合解决方案:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
    using System.Runtime.InteropServices;

    [DllImport (@"urlmon.dll", CharSet = CharSet.Auto)]
    private extern static System.UInt32 FindMimeFromData(
        System.UInt32 pBC,
        [MarshalAs(UnmanagedType.LPStr)] System.String pwzUrl,
        [MarshalAs(UnmanagedType.LPArray)] byte[] pBuffer,
        System.UInt32 cbSize,
        [MarshalAs(UnmanagedType.LPStr)] System.String pwzMimeProposed,
        System.UInt32 dwMimeFlags,
        out System.UInt32 ppwzMimeOut,
        System.UInt32 dwReserverd
    );

    private string GetMimeFromRegistry (string Filename)
    {
        string mime ="application/octetstream";
        string ext = System.IO.Path.GetExtension(Filename).ToLower();
        Microsoft.Win32.RegistryKey rk = Microsoft.Win32.Registry.ClassesRoot.OpenSubKey(ext);
        if (rk != null && rk.GetValue("Content Type") != null)
            mime = rk.GetValue("Content Type").ToString();
        return mime;
    }

    public string GetMimeTypeFromFileAndRegistry (string filename)
    {
        if (!File.Exists(filename))
        {
           return GetMimeFromRegistry (filename);
        }

        byte[] buffer = new byte[256];

        using (FileStream fs = new FileStream(filename, FileMode.Open))
        {
            if (fs.Length >= 256)
                fs.Read(buffer, 0, 256);
            else
                fs.Read(buffer, 0, (int)fs.Length);
        }

        try
        {            
            System.UInt32 mimetype;

            FindMimeFromData(0, null, buffer, 256, null, 0, out mimetype, 0);

            System.IntPtr mimeTypePtr = new IntPtr(mimetype);

            string mime = Marshal.PtrToStringUni(mimeTypePtr);

            Marshal.FreeCoTaskMem(mimeTypePtr);

            if (string.IsNullOrWhiteSpace (mime) ||
                mime =="text/plain" || mime =="application/octet-stream")                    
            {
                return GetMimeFromRegistry (filename);
            }

            return mime;
        }
        catch (Exception e)
        {
            return GetMimeFromRegistry (filename);
        }
    }

如果要在非Windows环境中托管ASP.NET解决方案,则来自Nuget的HeyRed.Mime.MimeGuesser.GuessMimeType将是最终的解决方案。

文件扩展名映射非常不安全。如果攻击者将上传无效的扩展名,则映射字典将例如允许将可执行文件分发到.webp文件中。
因此,请始终使用内容嗅探库来了解真实的内容类型。

1
2
3
4
5
6
7
8
 public  static string MimeTypeFrom(byte[] dataBytes, string fileName)
 {
        var contentType = HeyRed.Mime.MimeGuesser.GuessMimeType(dataBytes);
        if (string.IsNullOrEmpty(contentType))
        {
            return HeyRed.Mime.MimeTypesMap.GetMimeType(fileName);
        }
  return contentType;

我认为正确的答案是史蒂夫·摩根(Steve Morgan)和塞尔吉(Serguei)的答案的结合。这就是Internet Explorer的工作方式。对FindMimeFromData的pinvoke调用仅适用于26种硬编码的mime类型。此外,即使可能存在更具体,更合适的mime类型,它也会给出模糊的mime类型(例如text/plainapplication/octet-stream)。如果无法提供良好的mime类型,则可以转到注册表以获取更特定的mime类型。服务器注册表可能具有更多最新的mime类型。

请参阅:http://msdn.microsoft.com/en-us/library/ms775147(VS.85).aspx


此类使用以前的答案通过3种不同的方式进行尝试:基于扩展名,FindMimeFromData API和使用注册表进行编码。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
using System;
using System.Collections.Generic;
using System.IO;
using System.Runtime.InteropServices;

using Microsoft.Win32;

namespace YourNamespace
{
    public static class MimeTypeParser
    {
        [DllImport(@"urlmon.dll", CharSet = CharSet.Auto)]
        private extern static System.UInt32 FindMimeFromData(
                System.UInt32 pBC,
                [MarshalAs(UnmanagedType.LPStr)] System.String pwzUrl,
                [MarshalAs(UnmanagedType.LPArray)] byte[] pBuffer,
                System.UInt32 cbSize,
                [MarshalAs(UnmanagedType.LPStr)] System.String pwzMimeProposed,
                System.UInt32 dwMimeFlags,
                out System.UInt32 ppwzMimeOut,
                System.UInt32 dwReserverd
        );

        public static string GetMimeType(string sFilePath)
        {
            string sMimeType = GetMimeTypeFromList(sFilePath);

            if (String.IsNullOrEmpty(sMimeType))
            {
                sMimeType = GetMimeTypeFromFile(sFilePath);

                if (String.IsNullOrEmpty(sMimeType))
                {
                    sMimeType = GetMimeTypeFromRegistry(sFilePath);
                }
            }

            return sMimeType;
        }

        public static string GetMimeTypeFromList(string sFileNameOrPath)
        {
            string sMimeType = null;
            string sExtensionWithoutDot = Path.GetExtension(sFileNameOrPath).Substring(1).ToLower();

            if (!String.IsNullOrEmpty(sExtensionWithoutDot) && spDicMIMETypes.ContainsKey(sExtensionWithoutDot))
            {
                sMimeType = spDicMIMETypes[sExtensionWithoutDot];
            }

            return sMimeType;
        }

        public static string GetMimeTypeFromRegistry(string sFileNameOrPath)
        {
            string sMimeType = null;
            string sExtension = Path.GetExtension(sFileNameOrPath).ToLower();
            RegistryKey pKey = Registry.ClassesRoot.OpenSubKey(sExtension);

            if (pKey != null && pKey.GetValue("Content Type") != null)
            {
                sMimeType = pKey.GetValue("Content Type").ToString();
            }

            return sMimeType;
        }

        public static string GetMimeTypeFromFile(string sFilePath)
        {
            string sMimeType = null;

            if (File.Exists(sFilePath))
            {
                byte[] abytBuffer = new byte[256];

                using (FileStream pFileStream = new FileStream(sFilePath, FileMode.Open))
                {
                    if (pFileStream.Length >= 256)
                    {
                        pFileStream.Read(abytBuffer, 0, 256);
                    }
                    else
                    {
                        pFileStream.Read(abytBuffer, 0, (int)pFileStream.Length);
                    }
                }

                try
                {
                    UInt32 unMimeType;

                    FindMimeFromData(0, null, abytBuffer, 256, null, 0, out unMimeType, 0);

                    IntPtr pMimeType = new IntPtr(unMimeType);
                    string sMimeTypeFromFile = Marshal.PtrToStringUni(pMimeType);

                    Marshal.FreeCoTaskMem(pMimeType);

                    if (!String.IsNullOrEmpty(sMimeTypeFromFile) && sMimeTypeFromFile !="text/plain" && sMimeTypeFromFile !="application/octet-stream")
                    {
                        sMimeType = sMimeTypeFromFile;
                    }
                }
                catch {}
            }

            return sMimeType;
        }

        private static readonly Dictionary<string, string> spDicMIMETypes = new Dictionary<string, string>
        {
            {"ai","application/postscript"},
            {"aif","audio/x-aiff"},
            {"aifc","audio/x-aiff"},
            {"aiff","audio/x-aiff"},
            {"asc","text/plain"},
            {"atom","application/atom+xml"},
            {"au","audio/basic"},
            {"avi","video/x-msvideo"},
            {"bcpio","application/x-bcpio"},
            {"bin","application/octet-stream"},
            {"bmp","image/bmp"},
            {"cdf","application/x-netcdf"},
            {"cgm","image/cgm"},
            {"class","application/octet-stream"},
            {"cpio","application/x-cpio"},
            {"cpt","application/mac-compactpro"},
            {"csh","application/x-csh"},
            {"css","text/css"},
            {"dcr","application/x-director"},
            {"dif","video/x-dv"},
            {"dir","application/x-director"},
            {"djv","image/vnd.djvu"},
            {"djvu","image/vnd.djvu"},
            {"dll","application/octet-stream"},
            {"dmg","application/octet-stream"},
            {"dms","application/octet-stream"},
            {"doc","application/msword"},
            {"docx","application/vnd.openxmlformats-officedocument.wordprocessingml.document"},
            {"dotx","application/vnd.openxmlformats-officedocument.wordprocessingml.template"},
            {"docm","application/vnd.ms-word.document.macroEnabled.12"},
            {"dotm","application/vnd.ms-word.template.macroEnabled.12"},
            {"dtd","application/xml-dtd"},
            {"dv","video/x-dv"},
            {"dvi","application/x-dvi"},
            {"dxr","application/x-director"},
            {"eps","application/postscript"},
            {"etx","text/x-setext"},
            {"exe","application/octet-stream"},
            {"ez","application/andrew-inset"},
            {"gif","image/gif"},
            {"gram","application/srgs"},
            {"grxml","application/srgs+xml"},
            {"gtar","application/x-gtar"},
            {"hdf","application/x-hdf"},
            {"hqx","application/mac-binhex40"},
            {"htc","text/x-component"},
            {"htm","text/html"},
            {"html","text/html"},
            {"ice","x-conference/x-cooltalk"},
            {"ico","image/x-icon"},
            {"ics","text/calendar"},
            {"ief","image/ief"},
            {"ifb","text/calendar"},
            {"iges","model/iges"},
            {"igs","model/iges"},
            {"jnlp","application/x-java-jnlp-file"},
            {"jp2","image/jp2"},
            {"jpe","image/jpeg"},
            {"jpeg","image/jpeg"},
            {"jpg","image/jpeg"},
            {"js","application/x-javascript"},
            {"kar","audio/midi"},
            {"latex","application/x-latex"},
            {"lha","application/octet-stream"},
            {"lzh","application/octet-stream"},
            {"m3u","audio/x-mpegurl"},
            {"m4a","audio/mp4a-latm"},
            {"m4b","audio/mp4a-latm"},
            {"m4p","audio/mp4a-latm"},
            {"m4u","video/vnd.mpegurl"},
            {"m4v","video/x-m4v"},
            {"mac","image/x-macpaint"},
            {"man","application/x-troff-man"},
            {"mathml","application/mathml+xml"},
            {"me","application/x-troff-me"},
            {"mesh","model/mesh"},
            {"mid","audio/midi"},
            {"midi","audio/midi"},
            {"mif","application/vnd.mif"},
            {"mov","video/quicktime"},
            {"movie","video/x-sgi-movie"},
            {"mp2","audio/mpeg"},
            {"mp3","audio/mpeg"},
            {"mp4","video/mp4"},
            {"mpe","video/mpeg"},
            {"mpeg","video/mpeg"},
            {"mpg","video/mpeg"},
            {"mpga","audio/mpeg"},
            {"ms","application/x-troff-ms"},
            {"msh","model/mesh"},
            {"mxu","video/vnd.mpegurl"},
            {"nc","application/x-netcdf"},
            {"oda","application/oda"},
            {"ogg","application/ogg"},
            {"pbm","image/x-portable-bitmap"},
            {"pct","image/pict"},
            {"pdb","chemical/x-pdb"},
            {"pdf","application/pdf"},
            {"pgm","image/x-portable-graymap"},
            {"pgn","application/x-chess-pgn"},
            {"pic","image/pict"},
            {"pict","image/pict"},
            {"png","image/png"},
            {"pnm","image/x-portable-anymap"},
            {"pnt","image/x-macpaint"},
            {"pntg","image/x-macpaint"},
            {"ppm","image/x-portable-pixmap"},
            {"ppt","application/vnd.ms-powerpoint"},
            {"pptx","application/vnd.openxmlformats-officedocument.presentationml.presentation"},
            {"potx","application/vnd.openxmlformats-officedocument.presentationml.template"},
            {"ppsx","application/vnd.openxmlformats-officedocument.presentationml.slideshow"},
            {"ppam","application/vnd.ms-powerpoint.addin.macroEnabled.12"},
            {"pptm","application/vnd.ms-powerpoint.presentation.macroEnabled.12"},
            {"potm","application/vnd.ms-powerpoint.template.macroEnabled.12"},
            {"ppsm","application/vnd.ms-powerpoint.slideshow.macroEnabled.12"},
            {"ps","application/postscript"},
            {"qt","video/quicktime"},
            {"qti","image/x-quicktime"},
            {"qtif","image/x-quicktime"},
            {"ra","audio/x-pn-realaudio"},
            {"ram","audio/x-pn-realaudio"},
            {"ras","image/x-cmu-raster"},
            {"rdf","application/rdf+xml"},
            {"rgb","image/x-rgb"},
            {"rm","application/vnd.rn-realmedia"},
            {"roff","application/x-troff"},
            {"rtf","text/rtf"},
            {"rtx","text/richtext"},
            {"sgm","text/sgml"},
            {"sgml","text/sgml"},
            {"sh","application/x-sh"},
            {"shar","application/x-shar"},
            {"silo","model/mesh"},
            {"sit","application/x-stuffit"},
            {"skd","application/x-koan"},
            {"skm","application/x-koan"},
            {"skp","application/x-koan"},
            {"skt","application/x-koan"},
            {"smi","application/smil"},
            {"smil","application/smil"},
            {"snd","audio/basic"},
            {"so","application/octet-stream"},
            {"spl","application/x-futuresplash"},
            {"src","application/x-wais-source"},
            {"sv4cpio","application/x-sv4cpio"},
            {"sv4crc","application/x-sv4crc"},
            {"svg","image/svg+xml"},
            {"swf","application/x-shockwave-flash"},
            {"t","application/x-troff"},
            {"tar","application/x-tar"},
            {"tcl","application/x-tcl"},
            {"tex","application/x-tex"},
            {"texi","application/x-texinfo"},
            {"texinfo","application/x-texinfo"},
            {"tif","image/tiff"},
            {"tiff","image/tiff"},
            {"tr","application/x-troff"},
            {"tsv","text/tab-separated-values"},
            {"txt","text/plain"},
            {"ustar","application/x-ustar"},
            {"vcd","application/x-cdlink"},
            {"vrml","model/vrml"},
            {"vxml","application/voicexml+xml"},
            {"wav","audio/x-wav"},
            {"wbmp","image/vnd.wap.wbmp"},
            {"wbmxl","application/vnd.wap.wbxml"},
            {"wml","text/vnd.wap.wml"},
            {"wmlc","application/vnd.wap.wmlc"},
            {"wmls","text/vnd.wap.wmlscript"},
            {"wmlsc","application/vnd.wap.wmlscriptc"},
            {"wrl","model/vrml"},
            {"xbm","image/x-xbitmap"},
            {"xht","application/xhtml+xml"},
            {"xhtml","application/xhtml+xml"},
            {"xls","application/vnd.ms-excel"},                                                
            {"xml","application/xml"},
            {"xpm","image/x-xpixmap"},
            {"xsl","application/xml"},
            {"xlsx","application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"},
            {"xltx","application/vnd.openxmlformats-officedocument.spreadsheetml.template"},
            {"xlsm","application/vnd.ms-excel.sheet.macroEnabled.12"},
            {"xltm","application/vnd.ms-excel.template.macroEnabled.12"},
            {"xlam","application/vnd.ms-excel.addin.macroEnabled.12"},
            {"xlsb","application/vnd.ms-excel.sheet.binary.macroEnabled.12"},
            {"xslt","application/xslt+xml"},
            {"xul","application/vnd.mozilla.xul+xml"},
            {"xwd","image/x-xwindowdump"},
            {"xyz","chemical/x-xyz"},
            {"zip","application/zip"}
        };
    }
}

我发现这很有用。
对于VB.NET开发人员:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
    Public Shared Function GetFromFileName(ByVal fileName As String) As String
        Return GetFromExtension(Path.GetExtension(fileName).Remove(0, 1))
    End Function

    Public Shared Function GetFromExtension(ByVal extension As String) As String
        If extension.StartsWith("."c) Then
            extension = extension.Remove(0, 1)
        End If

        If MIMETypesDictionary.ContainsKey(extension) Then
            Return MIMETypesDictionary(extension)
        End If

        Return"unknown/unknown"
    End Function

    Private Shared ReadOnly MIMETypesDictionary As New Dictionary(Of String, String)() From { _
         {"ai","application/postscript"}, _
         {"aif","audio/x-aiff"}, _
         {"aifc","audio/x-aiff"}, _
         {"aiff","audio/x-aiff"}, _
         {"asc","text/plain"}, _
         {"atom","application/atom+xml"}, _
         {"au","audio/basic"}, _
         {"avi","video/x-msvideo"}, _
         {"bcpio","application/x-bcpio"}, _
         {"bin","application/octet-stream"}, _
         {"bmp","image/bmp"}, _
         {"cdf","application/x-netcdf"}, _
         {"cgm","image/cgm"}, _
         {"class","application/octet-stream"}, _
         {"cpio","application/x-cpio"}, _
         {"cpt","application/mac-compactpro"}, _
         {"csh","application/x-csh"}, _
         {"css","text/css"}, _
         {"dcr","application/x-director"}, _
         {"dif","video/x-dv"}, _
         {"dir","application/x-director"}, _
         {"djv","image/vnd.djvu"}, _
         {"djvu","image/vnd.djvu"}, _
         {"dll","application/octet-stream"}, _
         {"dmg","application/octet-stream"}, _
         {"dms","application/octet-stream"}, _
         {"doc","application/msword"}, _
         {"dtd","application/xml-dtd"}, _
         {"dv","video/x-dv"}, _
         {"dvi","application/x-dvi"}, _
         {"dxr","application/x-director"}, _
         {"eps","application/postscript"}, _
         {"etx","text/x-setext"}, _
         {"exe","application/octet-stream"}, _
         {"ez","application/andrew-inset"}, _
         {"gif","image/gif"}, _
         {"gram","application/srgs"}, _
         {"grxml","application/srgs+xml"}, _
         {"gtar","application/x-gtar"}, _
         {"hdf","application/x-hdf"}, _
         {"hqx","application/mac-binhex40"}, _
         {"htm","text/html"}, _
         {"html","text/html"}, _
         {"ice","x-conference/x-cooltalk"}, _
         {"ico","image/x-icon"}, _
         {"ics","text/calendar"}, _
         {"ief","image/ief"}, _
         {"ifb","text/calendar"}, _
         {"iges","model/iges"}, _
         {"igs","model/iges"}, _
         {"jnlp","application/x-java-jnlp-file"}, _
         {"jp2","image/jp2"}, _
         {"jpe","image/jpeg"}, _
         {"jpeg","image/jpeg"}, _
         {"jpg","image/jpeg"}, _
         {"js","application/x-javascript"}, _
         {"kar","audio/midi"}, _
         {"latex","application/x-latex"}, _
         {"lha","application/octet-stream"}, _
         {"lzh","application/octet-stream"}, _
         {"m3u","audio/x-mpegurl"}, _
         {"m4a","audio/mp4a-latm"}, _
         {"m4b","audio/mp4a-latm"}, _
         {"m4p","audio/mp4a-latm"}, _
         {"m4u","video/vnd.mpegurl"}, _
         {"m4v","video/x-m4v"}, _
         {"mac","image/x-macpaint"}, _
         {"man","application/x-troff-man"}, _
         {"mathml","application/mathml+xml"}, _
         {"me","application/x-troff-me"}, _
         {"mesh","model/mesh"}, _
         {"mid","audio/midi"}, _
         {"midi","audio/midi"}, _
         {"mif","application/vnd.mif"}, _
         {"mov","video/quicktime"}, _
         {"movie","video/x-sgi-movie"}, _
         {"mp2","audio/mpeg"}, _
         {"mp3","audio/mpeg"}, _
         {"mp4","video/mp4"}, _
         {"mpe","video/mpeg"}, _
         {"mpeg","video/mpeg"}, _
         {"mpg","video/mpeg"}, _
         {"mpga","audio/mpeg"}, _
         {"ms","application/x-troff-ms"}, _
         {"msh","model/mesh"}, _
         {"mxu","video/vnd.mpegurl"}, _
         {"nc","application/x-netcdf"}, _
         {"oda","application/oda"}, _
         {"ogg","application/ogg"}, _
         {"pbm","image/x-portable-bitmap"}, _
         {"pct","image/pict"}, _
         {"pdb","chemical/x-pdb"}, _
         {"pdf","application/pdf"}, _
         {"pgm","image/x-portable-graymap"}, _
         {"pgn","application/x-chess-pgn"}, _
         {"pic","image/pict"}, _
         {"pict","image/pict"}, _
         {"png","image/png"}, _
         {"pnm","image/x-portable-anymap"}, _
         {"pnt","image/x-macpaint"}, _
         {"pntg","image/x-macpaint"}, _
         {"ppm","image/x-portable-pixmap"}, _
         {"ppt","application/vnd.ms-powerpoint"}, _
         {"ps","application/postscript"}, _
         {"qt","video/quicktime"}, _
         {"qti","image/x-quicktime"}, _
         {"qtif","image/x-quicktime"}, _
         {"ra","audio/x-pn-realaudio"}, _
         {"ram","audio/x-pn-realaudio"}, _
         {"ras","image/x-cmu-raster"}, _
         {"rdf","application/rdf+xml"}, _
         {"rgb","image/x-rgb"}, _
         {"rm","application/vnd.rn-realmedia"}, _
         {"roff","application/x-troff"}, _
         {"rtf","text/rtf"}, _
         {"rtx","text/richtext"}, _
         {"sgm","text/sgml"}, _
         {"sgml","text/sgml"}, _
         {"sh","application/x-sh"}, _
         {"shar","application/x-shar"}, _
         {"silo","model/mesh"}, _
         {"sit","application/x-stuffit"}, _
         {"skd","application/x-koan"}, _
         {"skm","application/x-koan"}, _
         {"skp","application/x-koan"}, _
         {"skt","application/x-koan"}, _
         {"smi","application/smil"}, _
         {"smil","application/smil"}, _
         {"snd","audio/basic"}, _
         {"so","application/octet-stream"}, _
         {"spl","application/x-futuresplash"}, _
         {"src","application/x-wais-source"}, _
         {"sv4cpio","application/x-sv4cpio"}, _
         {"sv4crc","application/x-sv4crc"}, _
         {"svg","image/svg+xml"}, _
         {"swf","application/x-shockwave-flash"}, _
         {"t","application/x-troff"}, _
         {"tar","application/x-tar"}, _
         {"tcl","application/x-tcl"}, _
         {"tex","application/x-tex"}, _
         {"texi","application/x-texinfo"}, _
         {"texinfo","application/x-texinfo"}, _
         {"tif","image/tiff"}, _
         {"tiff","image/tiff"}, _
         {"tr","application/x-troff"}, _
         {"tsv","text/tab-separated-values"}, _
         {"txt","text/plain"}, _
         {"ustar","application/x-ustar"}, _
         {"vcd","application/x-cdlink"}, _
         {"vrml","model/vrml"}, _
         {"vxml","application/voicexml+xml"}, _
         {"wav","audio/x-wav"}, _
         {"wbmp","image/vnd.wap.wbmp"}, _
         {"wbmxl","application/vnd.wap.wbxml"}, _
         {"wml","text/vnd.wap.wml"}, _
         {"wmlc","application/vnd.wap.wmlc"}, _
         {"wmls","text/vnd.wap.wmlscript"}, _
         {"wmlsc","application/vnd.wap.wmlscriptc"}, _
         {"wrl","model/vrml"}, _
         {"xbm","image/x-xbitmap"}, _
         {"xht","application/xhtml+xml"}, _
         {"xhtml","application/xhtml+xml"}, _
         {"xls","application/vnd.ms-excel"}, _
         {"xml","application/xml"}, _
         {"xpm","image/x-xpixmap"}, _
         {"xsl","application/xml"}, _
         {"xslt","application/xslt+xml"}, _
         {"xul","application/vnd.mozilla.xul+xml"}, _
         {"xwd","image/x-xwindowdump"}, _
         {"xyz","chemical/x-xyz"}, _
         {"zip","application/zip"} _
        }

我遇到了同样的问题,最终选择了我自己的Kirk Ba??ucom解决方案风格,在此处找到。

在我看来,这是某人编写在线查询服务的机会。

无论如何,希望对您有所帮助。


如果有人支持,他们可以将出色的perl模块File :: Type移植到.NET。在代码中是针对每种文件类型或正则表达式匹配项的一组文件头魔术数字查找。

这是一个.NET文件类型检测库http://filetypedetective.codeplex.com/,但此刻它仅检测少量文件。


此答案是作者答案(Richard Gourlay)的副本,但根据Rohland指向http://www.pinvoke.net的评论而进行了改进,以解决IIS 8 / win2012(功能会导致应用程序池崩溃)上的问题。 /default.aspx/urlmon.findmimefromdata

1
using System.Runtime.InteropServices;

...

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
public static string GetMimeFromFile(string filename)
{

    if (!File.Exists(filename))
        throw new FileNotFoundException(filename +" not found");

    const int maxContent = 256;

    var buffer = new byte[maxContent];
    using (var fs = new FileStream(filename, FileMode.Open))
    {
        if (fs.Length >= maxContent)
            fs.Read(buffer, 0, maxContent);
        else
            fs.Read(buffer, 0, (int) fs.Length);
    }

    var mimeTypePtr = IntPtr.Zero;
    try
    {
        var result = FindMimeFromData(IntPtr.Zero, null, buffer, maxContent, null, 0, out mimeTypePtr, 0);
        if (result != 0)
        {
            Marshal.FreeCoTaskMem(mimeTypePtr);
            throw Marshal.GetExceptionForHR(result);
        }

        var mime = Marshal.PtrToStringUni(mimeTypePtr);
        Marshal.FreeCoTaskMem(mimeTypePtr);
        return mime;
    }
    catch (Exception e)
    {
        if (mimeTypePtr != IntPtr.Zero)
        {
            Marshal.FreeCoTaskMem(mimeTypePtr);
        }
        return"unknown/unknown";
    }
}

[DllImport("urlmon.dll", CharSet = CharSet.Unicode, ExactSpelling = true, SetLastError = false)]
private static extern int FindMimeFromData(IntPtr pBC,
    [MarshalAs(UnmanagedType.LPWStr)] string pwzUrl,
    [MarshalAs(UnmanagedType.LPArray, ArraySubType = UnmanagedType.I1, SizeParamIndex = 3)] byte[] pBuffer,
    int cbSize,
    [MarshalAs(UnmanagedType.LPWStr)] string pwzMimeProposed,
    int dwMimeFlags,
    out IntPtr ppwzMimeOut,
    int dwReserved);

@Steve Morgan和@Richard Gourlay,这是一个很好的解决方案,谢谢您。一个小的缺点是,当文件中的字节数为255或小于255时,MIME类型有时会产生"应用程序/八位字节流",这对于预期会产生"文本/纯文本"的文件来说有点不准确。我已更新您的原始方法来解决这种情况,如下所示:

如果文件中的字节数小于或等于255,并且推导的mime类型为" application / octet-stream",则创建一个新的字节数组,该数组由原始文件字节重复n次直到总数量组成的字节数> =256。然后重新检查该新字节数组上的mime-type。

修改方法:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
Imports System.Runtime.InteropServices

<DllImport("urlmon.dll", CharSet:=CharSet.Auto)> _
Private Shared Function FindMimeFromData(pBC As System.UInt32, <MarshalAs(UnmanagedType.LPStr)> pwzUrl As System.String, <MarshalAs(UnmanagedType.LPArray)> pBuffer As Byte(), cbSize As System.UInt32, <MarshalAs(UnmanagedType.LPStr)> pwzMimeProposed As System.String, dwMimeFlags As System.UInt32, _
ByRef ppwzMimeOut As System.UInt32, dwReserverd As System.UInt32) As System.UInt32
End Function
Private Function GetMimeType(ByVal f As FileInfo) As String
    'See http://stackoverflow.com/questions/58510/using-net-how-can-you-find-the-mime-type-of-a-file-based-on-the-file-signature
    Dim returnValue As String =""
    Dim fileStream As FileStream = Nothing
    Dim fileStreamLength As Long = 0
    Dim fileStreamIsLessThanBByteSize As Boolean = False

    Const byteSize As Integer = 255
    Const bbyteSize As Integer = byteSize + 1

    Const ambiguousMimeType As String ="application/octet-stream"
    Const unknownMimeType As String ="unknown/unknown"

    Dim buffer As Byte() = New Byte(byteSize) {}
    Dim fnGetMimeTypeValue As New Func(Of Byte(), Integer, String)(
        Function(_buffer As Byte(), _bbyteSize As Integer) As String
            Dim _returnValue As String =""
            Dim mimeType As UInt32 = 0
            FindMimeFromData(0, Nothing, _buffer, _bbyteSize, Nothing, 0, mimeType, 0)
            Dim mimeTypePtr As IntPtr = New IntPtr(mimeType)
            _returnValue = Marshal.PtrToStringUni(mimeTypePtr)
            Marshal.FreeCoTaskMem(mimeTypePtr)
            Return _returnValue
        End Function)

    If (f.Exists()) Then
        Try
            fileStream = New FileStream(f.FullName(), FileMode.Open, FileAccess.Read, FileShare.ReadWrite)
            fileStreamLength = fileStream.Length()

            If (fileStreamLength >= bbyteSize) Then
                fileStream.Read(buffer, 0, bbyteSize)
            Else
                fileStreamIsLessThanBByteSize = True
                fileStream.Read(buffer, 0, CInt(fileStreamLength))
            End If

            returnValue = fnGetMimeTypeValue(buffer, bbyteSize)

            If (returnValue.Equals(ambiguousMimeType, StringComparison.OrdinalIgnoreCase) AndAlso fileStreamIsLessThanBByteSize AndAlso fileStreamLength > 0) Then
                '
Duplicate the stream content until the stream length is >= bbyteSize to get a more deterministic mime type analysis.
                Dim currentBuffer As Byte() = buffer.Take(fileStreamLength).ToArray()
                Dim repeatCount As Integer = Math.Floor((bbyteSize / fileStreamLength) + 1)
                Dim bBufferList As List(Of Byte) = New List(Of Byte)
                While (repeatCount > 0)
                    bBufferList.AddRange(currentBuffer)
                    repeatCount -= 1
                End While
                Dim bbuffer As Byte() = bBufferList.Take(bbyteSize).ToArray()
                returnValue = fnGetMimeTypeValue(bbuffer, bbyteSize)
            End If
        Catch ex As Exception
            returnValue = unknownMimeType
        Finally
            If (fileStream IsNot Nothing) Then fileStream.Close()
        End Try
    End If
    Return returnValue
End Function

IIS 7或更高

使用此代码,但您需要成为服务器上的管理员

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
public bool CheckMimeMapExtension(string fileExtension)
        {
            try
            {

                using (
                ServerManager serverManager = new ServerManager())
                {  
                    // connects to default app.config
                    var config = serverManager.GetApplicationHostConfiguration();
                    var staticContent = config.GetSection("system.webServer/staticContent");
                    var mimeMap = staticContent.GetCollection();

                    foreach (var mimeType in mimeMap)
                    {

                        if (((String)mimeType["fileExtension"]).Equals(fileExtension, StringComparison.OrdinalIgnoreCase))
                            return true;

                    }

                }
                return false;
            }
            catch (Exception ex)
            {
                Console.WriteLine("An exception has occurred: \
{0}"
, ex.Message);
                Console.Read();
            }

            return false;

        }


我写了一个MIME类型的验证器。请与您分享。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
private readonly Dictionary<string, byte[]> _mimeTypes = new Dictionary<string, byte[]>
    {
        {"image/jpeg", new byte[] {255, 216, 255}},
        {"image/jpg", new byte[] {255, 216, 255}},
        {"image/pjpeg", new byte[] {255, 216, 255}},
        {"image/apng", new byte[] {137, 80, 78, 71, 13, 10, 26, 10, 0, 0, 0, 13, 73, 72, 68, 82}},
        {"image/png", new byte[] {137, 80, 78, 71, 13, 10, 26, 10, 0, 0, 0, 13, 73, 72, 68, 82}},
        {"image/bmp", new byte[] {66, 77}},
        {"image/gif", new byte[] {71, 73, 70, 56}},
    };

private bool ValidateMimeType(byte[] file, string contentType)
    {
        var imageType = _mimeTypes.SingleOrDefault(x => x.Key.Equals(contentType));

        return file.Take(imageType.Value.Length).SequenceEqual(imageType.Value);
    }

当使用Windows Azure Web角色或在"有限信任"中运行您的应用程序的任何其他主机时,请不要忘记不允许您访问注册表或非托管代码。混合方法-将try-catch-for-registry和内存中字典结合起来看起来是一个不错的解决方案,包含了所有内容。

我用下面的代码来做到这一点:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
public class DefaultMimeResolver : IMimeResolver
{
    private readonly IFileRepository _fileRepository;

    public DefaultMimeResolver(IFileRepository fileRepository)
    {
        _fileRepository = fileRepository;
    }

    [DllImport(@"urlmon.dll", CharSet = CharSet.Auto)]
    private static extern System.UInt32 FindMimeFromData(
        System.UInt32 pBC, [MarshalAs(UnmanagedType.LPStr)] System.String pwzUrl,
         [MarshalAs(UnmanagedType.LPArray)] byte[] pBuffer,
         System.UInt32 cbSize,
         [MarshalAs(UnmanagedType.LPStr)] System.String pwzMimeProposed,
         System.UInt32 dwMimeFlags,
         out System.UInt32 ppwzMimeOut,
         System.UInt32 dwReserverd);


    public string GetMimeTypeFromFileExtension(string fileExtension)
    {
        if (string.IsNullOrEmpty(fileExtension))
        {
            throw new ArgumentNullException("fileExtension");
        }

        string mimeType = GetMimeTypeFromList(fileExtension);

        if (String.IsNullOrEmpty(mimeType))
        {
            mimeType = GetMimeTypeFromRegistry(fileExtension);
        }

        return mimeType;
    }

    public string GetMimeTypeFromFile(string filePath)
    {
        if (string.IsNullOrEmpty(filePath))
        {
            throw new ArgumentNullException("filePath");
        }

        if (!File.Exists(filePath))
        {
            throw new FileNotFoundException("File not found :", filePath);
        }

        string mimeType = GetMimeTypeFromList(Path.GetExtension(filePath).ToLower());

        if (String.IsNullOrEmpty(mimeType))
        {
            mimeType = GetMimeTypeFromRegistry(Path.GetExtension(filePath).ToLower());

            if (String.IsNullOrEmpty(mimeType))
            {
                mimeType = GetMimeTypeFromFileInternal(filePath);
            }
        }

        return mimeType;
    }

    private string GetMimeTypeFromList(string fileExtension)
    {
        string mimeType = null;

        if (fileExtension.StartsWith("."))
        {
            fileExtension = fileExtension.TrimStart('.');
        }

        if (!String.IsNullOrEmpty(fileExtension) && _mimeTypes.ContainsKey(fileExtension))
        {
            mimeType = _mimeTypes[fileExtension];
        }

        return mimeType;
    }

    private string GetMimeTypeFromRegistry(string fileExtension)
    {
        string mimeType = null;
        try
        {
            RegistryKey key = Registry.ClassesRoot.OpenSubKey(fileExtension);

            if (key != null && key.GetValue("Content Type") != null)
            {
                mimeType = key.GetValue("Content Type").ToString();
            }
        }
        catch (Exception)
        {
            // Empty. When this code is running in limited mode accessing registry is not allowed.
        }

        return mimeType;
    }

    private string GetMimeTypeFromFileInternal(string filePath)
    {
        string mimeType = null;

        if (!File.Exists(filePath))
        {
            return null;
        }

        byte[] byteBuffer = new byte[256];

        using (FileStream fileStream = _fileRepository.Get(filePath))
        {
            if (fileStream.Length >= 256)
            {
                fileStream.Read(byteBuffer, 0, 256);
            }
            else
            {
                fileStream.Read(byteBuffer, 0, (int)fileStream.Length);
            }
        }

        try
        {
            UInt32 MimeTypeNum;

            FindMimeFromData(0, null, byteBuffer, 256, null, 0, out MimeTypeNum, 0);

            IntPtr mimeTypePtr = new IntPtr(MimeTypeNum);
            string mimeTypeFromFile = Marshal.PtrToStringUni(mimeTypePtr);

            Marshal.FreeCoTaskMem(mimeTypePtr);

            if (!String.IsNullOrEmpty(mimeTypeFromFile) && mimeTypeFromFile !="text/plain" && mimeTypeFromFile !="application/octet-stream")
            {
                mimeType = mimeTypeFromFile;
            }
        }
        catch
        {
            // Empty.
        }

        return mimeType;
    }

    private readonly Dictionary<string, string> _mimeTypes = new Dictionary<string, string>
        {
            {"ai","application/postscript"},
            {"aif","audio/x-aiff"},
            {"aifc","audio/x-aiff"},
            {"aiff","audio/x-aiff"},
            {"asc","text/plain"},
            {"atom","application/atom+xml"},
            {"au","audio/basic"},
            {"avi","video/x-msvideo"},
            {"bcpio","application/x-bcpio"},
            {"bin","application/octet-stream"},
            {"bmp","image/bmp"},
            {"cdf","application/x-netcdf"},
            {"cgm","image/cgm"},
            {"class","application/octet-stream"},
            {"cpio","application/x-cpio"},
            {"cpt","application/mac-compactpro"},
            {"csh","application/x-csh"},
            {"css","text/css"},
            {"dcr","application/x-director"},
            {"dif","video/x-dv"},
            {"dir","application/x-director"},
            {"djv","image/vnd.djvu"},
            {"djvu","image/vnd.djvu"},
            {"dll","application/octet-stream"},
            {"dmg","application/octet-stream"},
            {"dms","application/octet-stream"},
            {"doc","application/msword"},
            {"docx","application/vnd.openxmlformats-officedocument.wordprocessingml.document"},
            {"dotx","application/vnd.openxmlformats-officedocument.wordprocessingml.template"},
            {"docm","application/vnd.ms-word.document.macroEnabled.12"},
            {"dotm","application/vnd.ms-word.template.macroEnabled.12"},
            {"dtd","application/xml-dtd"},
            {"dv","video/x-dv"},
            {"dvi","application/x-dvi"},
            {"dxr","application/x-director"},
            {"eps","application/postscript"},
            {"etx","text/x-setext"},
            {"exe","application/octet-stream"},
            {"ez","application/andrew-inset"},
            {"gif","image/gif"},
            {"gram","application/srgs"},
            {"grxml","application/srgs+xml"},
            {"gtar","application/x-gtar"},
            {"hdf","application/x-hdf"},
            {"hqx","application/mac-binhex40"},
            {"htc","text/x-component"},
            {"htm","text/html"},
            {"html","text/html"},
            {"ice","x-conference/x-cooltalk"},
            {"ico","image/x-icon"},
            {"ics","text/calendar"},
            {"ief","image/ief"},
            {"ifb","text/calendar"},
            {"iges","model/iges"},
            {"igs","model/iges"},
            {"jnlp","application/x-java-jnlp-file"},
            {"jp2","image/jp2"},
            {"jpe","image/jpeg"},
            {"jpeg","image/jpeg"},
            {"jpg","image/jpeg"},
            {"js","application/x-javascript"},
            {"kar","audio/midi"},
            {"latex","application/x-latex"},
            {"lha","application/octet-stream"},
            {"lzh","application/octet-stream"},
            {"m3u","audio/x-mpegurl"},
            {"m4a","audio/mp4a-latm"},
            {"m4b","audio/mp4a-latm"},
            {"m4p","audio/mp4a-latm"},
            {"m4u","video/vnd.mpegurl"},
            {"m4v","video/x-m4v"},
            {"mac","image/x-macpaint"},
            {"man","application/x-troff-man"},
            {"mathml","application/mathml+xml"},
            {"me","application/x-troff-me"},
            {"mesh","model/mesh"},
            {"mid","audio/midi"},
            {"midi","audio/midi"},
            {"mif","application/vnd.mif"},
            {"mov","video/quicktime"},
            {"movie","video/x-sgi-movie"},
            {"mp2","audio/mpeg"},
            {"mp3","audio/mpeg"},
            {"mp4","video/mp4"},
            {"mpe","video/mpeg"},
            {"mpeg","video/mpeg"},
            {"mpg","video/mpeg"},
            {"mpga","audio/mpeg"},
            {"ms","application/x-troff-ms"},
            {"msh","model/mesh"},
            {"mxu","video/vnd.mpegurl"},
            {"nc","application/x-netcdf"},
            {"oda","application/oda"},
            {"ogg","application/ogg"},
            {"pbm","image/x-portable-bitmap"},
            {"pct","image/pict"},
            {"pdb","chemical/x-pdb"},
            {"pdf","application/pdf"},
            {"pgm","image/x-portable-graymap"},
            {"pgn","application/x-chess-pgn"},
            {"pic","image/pict"},
            {"pict","image/pict"},
            {"png","image/png"},
            {"pnm","image/x-portable-anymap"},
            {"pnt","image/x-macpaint"},
            {"pntg","image/x-macpaint"},
            {"ppm","image/x-portable-pixmap"},
            {"ppt","application/vnd.ms-powerpoint"},
            {"pptx","application/vnd.openxmlformats-officedocument.presentationml.presentation"},
            {"potx","application/vnd.openxmlformats-officedocument.presentationml.template"},
            {"ppsx","application/vnd.openxmlformats-officedocument.presentationml.slideshow"},
            {"ppam","application/vnd.ms-powerpoint.addin.macroEnabled.12"},
            {"pptm","application/vnd.ms-powerpoint.presentation.macroEnabled.12"},
            {"potm","application/vnd.ms-powerpoint.template.macroEnabled.12"},
            {"ppsm","application/vnd.ms-powerpoint.slideshow.macroEnabled.12"},
            {"ps","application/postscript"},
            {"qt","video/quicktime"},
            {"qti","image/x-quicktime"},
            {"qtif","image/x-quicktime"},
            {"ra","audio/x-pn-realaudio"},
            {"ram","audio/x-pn-realaudio"},
            {"ras","image/x-cmu-raster"},
            {"rdf","application/rdf+xml"},
            {"rgb","image/x-rgb"},
            {"rm","application/vnd.rn-realmedia"},
            {"roff","application/x-troff"},
            {"rtf","text/rtf"},
            {"rtx","text/richtext"},
            {"sgm","text/sgml"},
            {"sgml","text/sgml"},
            {"sh","application/x-sh"},
            {"shar","application/x-shar"},
            {"silo","model/mesh"},
            {"sit","application/x-stuffit"},
            {"skd","application/x-koan"},
            {"skm","application/x-koan"},
            {"skp","application/x-koan"},
            {"skt","application/x-koan"},
            {"smi","application/smil"},
            {"smil","application/smil"},
            {"snd","audio/basic"},
            {"so","application/octet-stream"},
            {"spl","application/x-futuresplash"},
            {"src","application/x-wais-source"},
            {"sv4cpio","application/x-sv4cpio"},
            {"sv4crc","application/x-sv4crc"},
            {"svg","image/svg+xml"},
            {"swf","application/x-shockwave-flash"},
            {"t","application/x-troff"},
            {"tar","application/x-tar"},
            {"tcl","application/x-tcl"},
            {"tex","application/x-tex"},
            {"texi","application/x-texinfo"},
            {"texinfo","application/x-texinfo"},
            {"tif","image/tiff"},
            {"tiff","image/tiff"},
            {"tr","application/x-troff"},
            {"tsv","text/tab-separated-values"},
            {"txt","text/plain"},
            {"ustar","application/x-ustar"},
            {"vcd","application/x-cdlink"},
            {"vrml","model/vrml"},
            {"vxml","application/voicexml+xml"},
            {"wav","audio/x-wav"},
            {"wbmp","image/vnd.wap.wbmp"},
            {"wbmxl","application/vnd.wap.wbxml"},
            {"wml","text/vnd.wap.wml"},
            {"wmlc","application/vnd.wap.wmlc"},
            {"wmls","text/vnd.wap.wmlscript"},
            {"wmlsc","application/vnd.wap.wmlscriptc"},
            {"wrl","model/vrml"},
            {"xbm","image/x-xbitmap"},
            {"xht","application/xhtml+xml"},
            {"xhtml","application/xhtml+xml"},
            {"xls","application/vnd.ms-excel"},
            {"xml","application/xml"},
            {"xpm","image/x-xpixmap"},
            {"xsl","application/xml"},
            {"xlsx","application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"},
            {"xltx","application/vnd.openxmlformats-officedocument.spreadsheetml.template"},
            {"xlsm","application/vnd.ms-excel.sheet.macroEnabled.12"},
            {"xltm","application/vnd.ms-excel.template.macroEnabled.12"},
            {"xlam","application/vnd.ms-excel.addin.macroEnabled.12"},
            {"xlsb","application/vnd.ms-excel.sheet.binary.macroEnabled.12"},
            {"xslt","application/xslt+xml"},
            {"xul","application/vnd.mozilla.xul+xml"},
            {"xwd","image/x-xwindowdump"},
            {"xyz","chemical/x-xyz"},
            {"zip","application/zip"}
        };
}

我最终使用了Netomatix的Winista MimeDetector。创建帐户后,可以免费下载源:http://www.netomatix.com/Products/DocumentManagement/MimeDetector.aspx

1
2
3
4
5
6
7
8
9
10
11
MimeTypes g_MimeTypes = new MimeTypes("mime-types.xml");
sbyte [] fileData = null;

using (System.IO.FileStream srcFile = new System.IO.FileStream(strFile, System.IO.FileMode.Open))
{
    byte [] data = new byte[srcFile.Length];
    srcFile.Read(data, 0, (Int32)srcFile.Length);
    fileData = Winista.Mime.SupportUtil.ToSByteArray(data);
}

MimeType oMimeType = g_MimeTypes.GetMimeType(fileData);

这是在此处回答的另一个问题的一部分:具有更多MIME类型的Urlmon.dll中FindMimeFromData方法的替代方法
我认为此问题的最佳解决方案。


我发现了运行此代码的几个问题:

1
2
UInt32 mimetype;
FindMimeFromData(0, null, buffer, 256, null, 0, out mimetype, 0);

如果您尝试在x64 / Win10上运行它,将会得到

1
2
AccessViolationException"Attempted to read or write protected memory.
This is often an indication that other memory is corrupt"

感谢这篇文章PtrToStringUni在Windows 10和@xanatos中不起作用

我修改了解决方案以在x64和.NET Core 2.1下运行:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
   [DllImport("urlmon.dll", CharSet = CharSet.Unicode, ExactSpelling = true,
    SetLastError = false)]
    static extern int FindMimeFromData(IntPtr pBC,
        [MarshalAs(UnmanagedType.LPWStr)] string pwzUrl,
        [MarshalAs(UnmanagedType.LPArray, ArraySubType=UnmanagedType.I1,
        SizeParamIndex=3)]
        byte[] pBuffer,
        int cbSize,
        [MarshalAs(UnmanagedType.LPWStr)] string pwzMimeProposed,
        int dwMimeFlags,
        out IntPtr ppwzMimeOut,
        int dwReserved);

   string getMimeFromFile(byte[] fileSource)
   {
            byte[] buffer = new byte[256];
            using (Stream stream = new MemoryStream(fileSource))
            {
                if (stream.Length >= 256)
                    stream.Read(buffer, 0, 256);
                else
                    stream.Read(buffer, 0, (int)stream.Length);
            }

            try
            {
                IntPtr mimeTypePtr;
                FindMimeFromData(IntPtr.Zero, null, buffer, buffer.Length,
                    null, 0, out mimeTypePtr, 0);

                string mime = Marshal.PtrToStringUni(mimeTypePtr);
                Marshal.FreeCoTaskMem(mimeTypePtr);
                return mime;
            }
            catch (Exception ex)
            {
                return"unknown/unknown";
            }
   }

谢谢


您好,我已经将Winista.MimeDetect项目改编为.net核心/框架,并将其回退为urlmon.dll。

1
2
3
4
5
   //init
   var mimeTypes = new MimeTypes();

   //usage by filepath
   var mimeType1 = mimeTypes.GetMimeTypeFromFile(filePath);


推荐阅读