11using System . Diagnostics . CodeAnalysis ;
22using System . Text ;
33
4- namespace MagicFileEncoding
5- {
4+ namespace MagicFileEncoding ;
5+
6+ /// <summary>
7+ /// <para>List of additional encodings</para>
8+ /// Encoding will be <i>null</i> if required codepage can't be retrieved
9+ /// </summary>
10+ [ SuppressMessage ( "ReSharper" , "InconsistentNaming" ) ]
11+ public static class AdditionalEncoding
12+ {
613 /// <summary>
7- /// <para>List of additional encodings</para>
8- /// Encoding will be <i>null</i> if required codepage can't be retrieved
14+ /// <para>(Latin-1)</para>
15+ /// This character set contains the script-specific characters for Western European and American languages.
16+ /// The character set covers Albanian, Catalan, Danish, Dutch, English, Faroese, Finnish, French, Galician,
17+ /// Icelandic, German, Italian, Norwegian, Portuguese, Spanish and Swedish. Only single characters like the
18+ /// Dutch "ij" or the German quotation marks below are missing.
919 /// </summary>
10- [ SuppressMessage ( "ReSharper" , "InconsistentNaming" ) ]
11- public static class AdditionalEncoding
12- {
13- /// <summary>
14- /// <para>(Latin-1)</para>
15- /// This character set contains the script-specific characters for Western European and American languages.
16- /// The character set covers Albanian, Catalan, Danish, Dutch, English, Faroese, Finnish, French, Galician,
17- /// Icelandic, German, Italian, Norwegian, Portuguese, Spanish and Swedish. Only single characters like the
18- /// Dutch "ij" or the German quotation marks below are missing.
19- /// </summary>
20- public static readonly Encoding ISO_8859_1 = SoftFetchEncoding ( "iso-8859-1" ) ;
20+ public static readonly Encoding ? ISO_8859_1 = SoftFetchEncoding ( "iso-8859-1" ) ;
2121
22- /// <summary>
23- /// <para>(Latin-2)</para>
24- /// This character set contains the script-specific characters for most Central European and Slavic languages.
25- /// The character set covers Croatian, Polish, Romanian, Slovak, Slovenian, Czech and Hungarian.
26- /// </summary>
27- public static readonly Encoding ISO_8859_2 = SoftFetchEncoding ( "iso-8859-2" ) ;
22+ /// <summary>
23+ /// <para>(Latin-2)</para>
24+ /// This character set contains the script-specific characters for most Central European and Slavic languages.
25+ /// The character set covers Croatian, Polish, Romanian, Slovak, Slovenian, Czech and Hungarian.
26+ /// </summary>
27+ public static readonly Encoding ? ISO_8859_2 = SoftFetchEncoding ( "iso-8859-2" ) ;
2828
29- /// <summary>
30- /// <para>(Latin-3)</para>
31- /// This character set covers the languages Esperanto, Galician, Maltese and Turkish.
32- /// </summary>
33- public static readonly Encoding ISO_8859_3 = SoftFetchEncoding ( "iso-8859-3" ) ;
29+ /// <summary>
30+ /// <para>(Latin-3)</para>
31+ /// This character set covers the languages Esperanto, Galician, Maltese and Turkish.
32+ /// </summary>
33+ public static readonly Encoding ? ISO_8859_3 = SoftFetchEncoding ( "iso-8859-3" ) ;
3434
35- /// <summary>
36- /// <para>(Latin-4)</para>
37- /// This character set contains some characters of Estonian, Latvian and Lithuanian languages.
38- /// Compare this character set also with ISO 8859-10, which is very similar.
39- /// </summary>
40- public static readonly Encoding ISO_8859_4 = SoftFetchEncoding ( "iso-8859-4" ) ;
35+ /// <summary>
36+ /// <para>(Latin-4)</para>
37+ /// This character set contains some characters of Estonian, Latvian and Lithuanian languages.
38+ /// Compare this character set also with ISO 8859-10, which is very similar.
39+ /// </summary>
40+ public static readonly Encoding ? ISO_8859_4 = SoftFetchEncoding ( "iso-8859-4" ) ;
4141
42- /// <summary>
43- /// This character set contains Cyrillic characters.
44- /// It largely covers the Bulgarian, Macedonian, Russian, Serbian and Ukrainian languages.
45- /// </summary>
46- public static readonly Encoding ISO_8859_5 = SoftFetchEncoding ( "iso-8859-5" ) ;
42+ /// <summary>
43+ /// This character set contains Cyrillic characters.
44+ /// It largely covers the Bulgarian, Macedonian, Russian, Serbian and Ukrainian languages.
45+ /// </summary>
46+ public static readonly Encoding ? ISO_8859_5 = SoftFetchEncoding ( "iso-8859-5" ) ;
4747
48- /// <summary>
49- /// This character set contains characters of Arabic script. However, the representation of the characters
50- /// in the following table is "abstract" because the characters vary in writing practice depending on whether
51- /// they are at the beginning, middle, or end of a word, or individually.
52- /// Arabic is further characterized by the fact that the direction of writing is from right to left.
53- /// </summary>
54- public static readonly Encoding ISO_8859_6 = SoftFetchEncoding ( "iso-8859-6" ) ;
48+ /// <summary>
49+ /// This character set contains characters of Arabic script. However, the representation of the characters
50+ /// in the following table is "abstract" because the characters vary in writing practice depending on whether
51+ /// they are at the beginning, middle, or end of a word, or individually.
52+ /// Arabic is further characterized by the fact that the direction of writing is from right to left.
53+ /// </summary>
54+ public static readonly Encoding ? ISO_8859_6 = SoftFetchEncoding ( "iso-8859-6" ) ;
5555
56- /// <summary>
57- /// This character set contains the characters of the Modern Greek script.
58- /// </summary>
59- public static readonly Encoding ISO_8859_7 = SoftFetchEncoding ( "iso-8859-7" ) ;
56+ /// <summary>
57+ /// This character set contains the characters of the Modern Greek script.
58+ /// </summary>
59+ public static readonly Encoding ? ISO_8859_7 = SoftFetchEncoding ( "iso-8859-7" ) ;
6060
61- /// <summary>
62- /// This character set contains the characters of the Hebrew script.
63- /// As with the Arabic script, the direction of writing is from right to left.
64- /// </summary>
65- public static readonly Encoding ISO_8859_8 = SoftFetchEncoding ( "iso-8859-8" ) ;
61+ /// <summary>
62+ /// This character set contains the characters of the Hebrew script.
63+ /// As with the Arabic script, the direction of writing is from right to left.
64+ /// </summary>
65+ public static readonly Encoding ? ISO_8859_8 = SoftFetchEncoding ( "iso-8859-8" ) ;
6666
67- /// <summary>
68- /// <para>(Latin-5)</para>
69- /// This character set is specially designed for Turkish. It is based on ISO 8859-1,
70- /// but contains Turkish characters instead of the Icelandic special characters.
71- /// </summary>
72- public static readonly Encoding ISO_8859_9 = SoftFetchEncoding ( "iso-8859-9" ) ;
67+ /// <summary>
68+ /// <para>(Latin-5)</para>
69+ /// This character set is specially designed for Turkish. It is based on ISO 8859-1,
70+ /// but contains Turkish characters instead of the Icelandic special characters.
71+ /// </summary>
72+ public static readonly Encoding ? ISO_8859_9 = SoftFetchEncoding ( "iso-8859-9" ) ;
7373
74- /// <summary>
75- /// <para>(Latin-6)</para>
76- /// This character set specifically contains characters for
77- /// the Greenlandic (Inuit) and Lappish (Sami) languages.
78- /// </summary>
79- public static readonly Encoding ISO_8859_10 = SoftFetchEncoding ( "iso-8859-10" ) ;
74+ /// <summary>
75+ /// <para>(Latin-6)</para>
76+ /// This character set specifically contains characters for
77+ /// the Greenlandic (Inuit) and Lappish (Sami) languages.
78+ /// </summary>
79+ public static readonly Encoding ? ISO_8859_10 = SoftFetchEncoding ( "iso-8859-10" ) ;
8080
81- /// <summary>
82- /// UTF-32 always encodes a character in exactly 32 bits and is thus the simplest, since no variable character
83- /// length is used and no intelligent algorithm is required, but at the expense of memory size
84- /// if only characters of the ASCII character set are used, more than four times as much memory is required
85- /// as with encoding in ASCII (7 bits required). Depending on the sequence of the bytes, whether the least
86- /// significant byte or the most significant byte is transmitted first,
87- /// one speaks of Little Endian (UTF-32LE) or <b>Big Endian (UTF-32BE)</b>.
88- /// </summary>
89- public static readonly Encoding UTF32BE = SoftFetchEncoding ( "utf-32BE" ) ;
81+ /// <summary>
82+ /// UTF-32 always encodes a character in exactly 32 bits and is thus the simplest, since no variable character
83+ /// length is used and no intelligent algorithm is required, but at the expense of memory size
84+ /// if only characters of the ASCII character set are used, more than four times as much memory is required
85+ /// as with encoding in ASCII (7 bits required). Depending on the sequence of the bytes, whether the least
86+ /// significant byte or the most significant byte is transmitted first,
87+ /// one speaks of Little Endian (UTF-32LE) or <b>Big Endian (UTF-32BE)</b>.
88+ /// </summary>
89+ public static readonly Encoding ? UTF32BE = SoftFetchEncoding ( "utf-32BE" ) ;
9090
91- /// <summary>
92- /// Get the requested encoding and consume exception if it can't be found in code pages
93- /// </summary>
94- /// <param name="encoding">The encoding name</param>
95- /// <returns>The encoding object or <i>null</i></returns>
96- private static Encoding SoftFetchEncoding ( string encoding )
91+ /// <summary>
92+ /// Get the requested encoding and consume exception if it can't be found in code pages
93+ /// </summary>
94+ /// <param name="encoding">The encoding name</param>
95+ /// <returns>The encoding object or <i>null</i></returns>
96+ private static Encoding ? SoftFetchEncoding ( string encoding )
97+ {
98+ try
99+ {
100+ return Encoding . GetEncoding ( encoding ) ;
101+ }
102+ catch
97103 {
98- try
99- {
100- return Encoding . GetEncoding ( encoding ) ;
101- }
102- catch
103- {
104- return null ;
105- }
104+ return null ;
106105 }
107106 }
108107}
0 commit comments