Added encoding Windows cp1251

This commit is contained in:
Alexandr Belousov 2017-08-08 14:21:24 +06:00
parent bdcbb419e9
commit d7908aaf95

View file

@ -8,7 +8,8 @@ for arbitrary _encoding and decoding of characters, arbitrary transcoding
between strings of different type, as well as validation and sanitization. between strings of different type, as well as validation and sanitization.
Encodings currently supported are UTF-8, UTF-16, UTF-32, ASCII, ISO-8859-1 Encodings currently supported are UTF-8, UTF-16, UTF-32, ASCII, ISO-8859-1
(also known as LATIN-1), ISO-8859-2 (LATIN-2), WINDOWS-1250 and WINDOWS-1252. (also known as LATIN-1), ISO-8859-2 (LATIN-2), WINDOWS-1250, WINDOWS-1251
and WINDOWS-1252.
$(SCRIPT inhibitQuickIndex = 1;) $(SCRIPT inhibitQuickIndex = 1;)
$(BOOKTABLE, $(BOOKTABLE,
@ -53,6 +54,7 @@ $(TR $(TD Encoding schemes) $(TD
$(LREF EncodingSchemeUtf32Native) $(LREF EncodingSchemeUtf32Native)
$(LREF EncodingSchemeUtf8) $(LREF EncodingSchemeUtf8)
$(LREF EncodingSchemeWindows1250) $(LREF EncodingSchemeWindows1250)
$(LREF EncodingSchemeWindows1251)
$(LREF EncodingSchemeWindows1252) $(LREF EncodingSchemeWindows1252)
)) ))
$(TR $(TD Representation) $(TD $(TR $(TD Representation) $(TD
@ -64,6 +66,8 @@ $(TR $(TD Representation) $(TD
$(LREF Latin2String) $(LREF Latin2String)
$(LREF Windows1250Char) $(LREF Windows1250Char)
$(LREF Windows1250String) $(LREF Windows1250String)
$(LREF Windows1251Char)
$(LREF Windows1251String)
$(LREF Windows1252Char) $(LREF Windows1252Char)
$(LREF Windows1252String) $(LREF Windows1252String)
)) ))
@ -84,8 +88,8 @@ auto e = EncodingScheme.create("utf-8");
This library supplies $(LREF EncodingScheme) subclasses for ASCII, This library supplies $(LREF EncodingScheme) subclasses for ASCII,
ISO-8859-1 (also known as LATIN-1), ISO-8859-2 (LATIN-2), WINDOWS-1250, ISO-8859-1 (also known as LATIN-1), ISO-8859-2 (LATIN-2), WINDOWS-1250,
WINDOWS-1252, UTF-8, and (on little-endian architectures) UTF-16LE and WINDOWS-1251, WINDOWS-1252, UTF-8, and (on little-endian architectures)
UTF-32LE; or (on big-endian architectures) UTF-16BE and UTF-32BE. UTF-16LE and UTF-32LE; or (on big-endian architectures) UTF-16BE and UTF-32BE.
This library provides a mechanism whereby other modules may add $(LREF This library provides a mechanism whereby other modules may add $(LREF
EncodingScheme) subclasses for any other _encoding. EncodingScheme) subclasses for any other _encoding.
@ -395,6 +399,10 @@ import std.typecons;
Windows1250String y; Windows1250String y;
transcode(s,y); transcode(s,y);
assert(y == cast(Windows1250Char[])[0x8e, 'l', 'u', 0x9d, 'o', 'u', 0xe8, 'k', 0xfd, ' ', 'k', 0xf9, 0xf2]); assert(y == cast(Windows1250Char[])[0x8e, 'l', 'u', 0x9d, 'o', 'u', 0xe8, 'k', 0xfd, ' ', 'k', 0xf9, 0xf2]);
s = "\u0402lu\u0403ou\u201D\u045C k\u0414\u044F";
Windows1251String s51;
transcode(s,s51);
assert(s51 == cast(Windows1251Char[])[0x80, 'l', 'u', 0x81, 'o', 'u', 0x94, 0x9d, ' ', 'k', 0xc4, 0xff]);
} }
// Make sure we can count properly // Make sure we can count properly
@ -1114,6 +1122,101 @@ private template EncoderInstance(CharType : Windows1250Char)
mixin GenericEncoder!(); mixin GenericEncoder!();
} }
//=============================================================================
// WINDOWS-1251
//=============================================================================
/// Defines a Windows1251-encoded character.
enum Windows1251Char : ubyte { init }
/**
* Defines an Windows1251-encoded string (as an array of $(D
* immutable(Windows1251Char))).
*/
alias Windows1251String = immutable(Windows1251Char)[];
private template EncoderInstance(CharType : Windows1251Char)
{
import std.typecons : Tuple, tuple;
alias E = Windows1251Char;
alias EString = Windows1251String;
@property string encodingName() @safe pure nothrow @nogc
{
return "windows-1251";
}
private static immutable dchar m_charMapStart = 0x80;
private static immutable dchar m_charMapEnd = 0xff;
private immutable wstring charMap =
"\u0402\u0403\u201A\u0453\u201E\u2026\u2020\u2021"~
"\u20AC\u2030\u0409\u2039\u040A\u040C\u040B\u040F"~
"\u0452\u2018\u2019\u201C\u201D\u2022\u2013\u2014"~
"\uFFFD\u2122\u0459\u203A\u045A\u045C\u045B\u045F"~
"\u00A0\u040E\u045E\u0408\u00A4\u0490\u00A6\u00A7"~
"\u0401\u00A9\u0404\u00AB\u00AC\u00AD\u00AE\u0407"~
"\u00B0\u00B1\u0406\u0456\u0491\u00B5\u00B6\u00B7"~
"\u0451\u2116\u0454\u00BB\u0458\u0405\u0455\u0457"~
"\u0410\u0411\u0412\u0413\u0414\u0415\u0416\u0417"~
"\u0418\u0419\u041A\u041B\u041C\u041D\u041E\u041F"~
"\u0420\u0421\u0422\u0423\u0424\u0425\u0426\u0427"~
"\u0428\u0429\u042A\u042B\u042C\u042D\u042E\u042F"~
"\u0430\u0431\u0432\u0433\u0434\u0435\u0436\u0437"~
"\u0438\u0439\u043A\u043B\u043C\u043D\u043E\u043F"~
"\u0440\u0441\u0442\u0443\u0444\u0445\u0446\u0447"~
"\u0448\u0449\u044A\u044B\u044C\u044D\u044E\u044F";
private immutable Tuple!(wchar, char)[] bstMap = [
tuple('\u0432','\xE2'),tuple('\u0412','\xC2'),tuple('\u0453','\x83'),
tuple('\u0401','\xA8'),tuple('\u0422','\xD2'),tuple('\u0442','\xF2'),
tuple('\u2018','\x91'),tuple('\u00AD','\xAD'),tuple('\u0409','\x8A'),
tuple('\u041A','\xCA'),tuple('\u042A','\xDA'),tuple('\u043A','\xEA'),
tuple('\u044A','\xFA'),tuple('\u045B','\x9E'),tuple('\u2022','\x95'),
tuple('\u00A7','\xA7'),tuple('\u00B5','\xB5'),tuple('\u0405','\xBD'),
tuple('\u040E','\xA1'),tuple('\u0416','\xC6'),tuple('\u041E','\xCE'),
tuple('\u0426','\xD6'),tuple('\u042E','\xDE'),tuple('\u0436','\xE6'),
tuple('\u043E','\xEE'),tuple('\u0446','\xF6'),tuple('\u044E','\xFE'),
tuple('\u0457','\xBF'),tuple('\u0490','\xA5'),tuple('\u201D','\x94'),
tuple('\u203A','\x9B'),tuple('\u00A4','\xA4'),tuple('\u00AB','\xAB'),
tuple('\u00B0','\xB0'),tuple('\u00B7','\xB7'),tuple('\u0403','\x81'),
tuple('\u0407','\xAF'),tuple('\u040B','\x8E'),tuple('\u0410','\xC0'),
tuple('\u0414','\xC4'),tuple('\u0418','\xC8'),tuple('\u041C','\xCC'),
tuple('\u0420','\xD0'),tuple('\u0424','\xD4'),tuple('\u0428','\xD8'),
tuple('\u042C','\xDC'),tuple('\u0430','\xE0'),tuple('\u0434','\xE4'),
tuple('\u0438','\xE8'),tuple('\u043C','\xEC'),tuple('\u0440','\xF0'),
tuple('\u0444','\xF4'),tuple('\u0448','\xF8'),tuple('\u044C','\xFC'),
tuple('\u0451','\xB8'),tuple('\u0455','\xBE'),tuple('\u0459','\x9A'),
tuple('\u045E','\xA2'),tuple('\u2013','\x96'),tuple('\u201A','\x82'),
tuple('\u2020','\x86'),tuple('\u2030','\x89'),tuple('\u2116','\xB9'),
tuple('\u00A0','\xA0'),tuple('\u00A6','\xA6'),tuple('\u00A9','\xA9'),
tuple('\u00AC','\xAC'),tuple('\u00AE','\xAE'),tuple('\u00B1','\xB1'),
tuple('\u00B6','\xB6'),tuple('\u00BB','\xBB'),tuple('\u0402','\x80'),
tuple('\u0404','\xAA'),tuple('\u0406','\xB2'),tuple('\u0408','\xA3'),
tuple('\u040A','\x8C'),tuple('\u040C','\x8D'),tuple('\u040F','\x8F'),
tuple('\u0411','\xC1'),tuple('\u0413','\xC3'),tuple('\u0415','\xC5'),
tuple('\u0417','\xC7'),tuple('\u0419','\xC9'),tuple('\u041B','\xCB'),
tuple('\u041D','\xCD'),tuple('\u041F','\xCF'),tuple('\u0421','\xD1'),
tuple('\u0423','\xD3'),tuple('\u0425','\xD5'),tuple('\u0427','\xD7'),
tuple('\u0429','\xD9'),tuple('\u042B','\xDB'),tuple('\u042D','\xDD'),
tuple('\u042F','\xDF'),tuple('\u0431','\xE1'),tuple('\u0433','\xE3'),
tuple('\u0435','\xE5'),tuple('\u0437','\xE7'),tuple('\u0439','\xE9'),
tuple('\u043B','\xEB'),tuple('\u043D','\xED'),tuple('\u043F','\xEF'),
tuple('\u0441','\xF1'),tuple('\u0443','\xF3'),tuple('\u0445','\xF5'),
tuple('\u0447','\xF7'),tuple('\u0449','\xF9'),tuple('\u044B','\xFB'),
tuple('\u044D','\xFD'),tuple('\u044F','\xFF'),tuple('\u0452','\x90'),
tuple('\u0454','\xBA'),tuple('\u0456','\xB3'),tuple('\u0458','\xBC'),
tuple('\u045A','\x9C'),tuple('\u045C','\x9D'),tuple('\u045F','\x9F'),
tuple('\u0491','\xB4'),tuple('\u2014','\x97'),tuple('\u2019','\x92'),
tuple('\u201C','\x93'),tuple('\u201E','\x84'),tuple('\u2021','\x87'),
tuple('\u2026','\x85'),tuple('\u2039','\x8B'),tuple('\u20AC','\x88'),
tuple('\u2122','\x99')
];
mixin GenericEncoder!();
}
//============================================================================= //=============================================================================
// WINDOWS-1252 // WINDOWS-1252
//============================================================================= //=============================================================================
@ -1508,7 +1611,7 @@ Returns true if c is a valid code point
This function supersedes $(D std.utf.startsValidDchar()). This function supersedes $(D std.utf.startsValidDchar()).
Standards: Unicode 5.0, ASCII, ISO-8859-1, ISO-8859-2, WINDOWS-1250, Standards: Unicode 5.0, ASCII, ISO-8859-1, ISO-8859-2, WINDOWS-1250,
WINDOWS-1252 WINDOWS-1251, WINDOWS-1252
Params: Params:
c = the code point to be tested c = the code point to be tested
@ -1525,7 +1628,7 @@ bool isValidCodePoint(dchar c) @safe pure nothrow @nogc
explicitly specify the encoding type. explicitly specify the encoding type.
Standards: Unicode 5.0, ASCII, ISO-8859-1, ISO-8859-2, WINDOWS-1250, Standards: Unicode 5.0, ASCII, ISO-8859-1, ISO-8859-2, WINDOWS-1250,
WINDOWS-1252 WINDOWS-1251, WINDOWS-1252
*/ */
@property string encodingName(T)() @property string encodingName(T)()
{ {
@ -1542,6 +1645,7 @@ bool isValidCodePoint(dchar c) @safe pure nothrow @nogc
assert(encodingName!(Latin1Char) == "ISO-8859-1"); assert(encodingName!(Latin1Char) == "ISO-8859-1");
assert(encodingName!(Latin2Char) == "ISO-8859-2"); assert(encodingName!(Latin2Char) == "ISO-8859-2");
assert(encodingName!(Windows1250Char) == "windows-1250"); assert(encodingName!(Windows1250Char) == "windows-1250");
assert(encodingName!(Windows1251Char) == "windows-1251");
assert(encodingName!(Windows1252Char) == "windows-1252"); assert(encodingName!(Windows1252Char) == "windows-1252");
} }
@ -1553,7 +1657,7 @@ bool isValidCodePoint(dchar c) @safe pure nothrow @nogc
explicitly specify the encoding type. explicitly specify the encoding type.
Standards: Unicode 5.0, ASCII, ISO-8859-1, ISO-8859-2, WINDOWS-1250, Standards: Unicode 5.0, ASCII, ISO-8859-1, ISO-8859-2, WINDOWS-1250,
WINDOWS-1252 WINDOWS-1251, WINDOWS-1252
*/ */
bool canEncode(E)(dchar c) bool canEncode(E)(dchar c)
{ {
@ -1571,6 +1675,9 @@ bool canEncode(E)(dchar c)
assert( canEncode!(Windows1250Char)('\u20AC')); assert( canEncode!(Windows1250Char)('\u20AC'));
assert(!canEncode!(Windows1250Char)('\u20AD')); assert(!canEncode!(Windows1250Char)('\u20AD'));
assert(!canEncode!(Windows1250Char)('\uFFFD')); assert(!canEncode!(Windows1250Char)('\uFFFD'));
assert( canEncode!(Windows1251Char)('\u0402'));
assert(!canEncode!(Windows1251Char)('\u20AD'));
assert(!canEncode!(Windows1251Char)('\uFFFD'));
assert( canEncode!(Windows1252Char)('\u20AC')); assert( canEncode!(Windows1252Char)('\u20AC'));
assert(!canEncode!(Windows1252Char)('\u20AD')); assert(!canEncode!(Windows1252Char)('\u20AD'));
assert(!canEncode!(Windows1252Char)('\uFFFD')); assert(!canEncode!(Windows1252Char)('\uFFFD'));
@ -1595,7 +1702,7 @@ bool canEncode(E)(dchar c)
0x00 to 0x7F. 0x00 to 0x7F.
Standards: Unicode 5.0, ASCII, ISO-8859-1, ISO-8859-2, WINDOWS-1250, Standards: Unicode 5.0, ASCII, ISO-8859-1, ISO-8859-2, WINDOWS-1250,
WINDOWS-1252 WINDOWS-1251, WINDOWS-1252
Params: Params:
c = the code unit to be tested c = the code unit to be tested
@ -1615,6 +1722,8 @@ bool isValidCodeUnit(E)(E c)
assert(!isValidCodeUnit(cast(AsciiChar) 0xA0)); assert(!isValidCodeUnit(cast(AsciiChar) 0xA0));
assert( isValidCodeUnit(cast(Windows1250Char) 0x80)); assert( isValidCodeUnit(cast(Windows1250Char) 0x80));
assert(!isValidCodeUnit(cast(Windows1250Char) 0x81)); assert(!isValidCodeUnit(cast(Windows1250Char) 0x81));
assert( isValidCodeUnit(cast(Windows1251Char) 0x80));
assert(!isValidCodeUnit(cast(Windows1251Char) 0x98));
assert( isValidCodeUnit(cast(Windows1252Char) 0x80)); assert( isValidCodeUnit(cast(Windows1252Char) 0x80));
assert(!isValidCodeUnit(cast(Windows1252Char) 0x81)); assert(!isValidCodeUnit(cast(Windows1252Char) 0x81));
} }
@ -1628,7 +1737,7 @@ bool isValidCodeUnit(E)(E c)
whereas the older function would throw an exception. whereas the older function would throw an exception.
Standards: Unicode 5.0, ASCII, ISO-8859-1, ISO-8859-2, WINDOWS-1250, Standards: Unicode 5.0, ASCII, ISO-8859-1, ISO-8859-2, WINDOWS-1250,
WINDOWS-1252 WINDOWS-1251, WINDOWS-1252
Params: Params:
s = the string to be tested s = the string to be tested
@ -1650,7 +1759,7 @@ bool isValid(E)(const(E)[] s)
the first code unit, which is validly encoded. the first code unit, which is validly encoded.
Standards: Unicode 5.0, ASCII, ISO-8859-1, ISO-8859-2, WINDOWS-1250, Standards: Unicode 5.0, ASCII, ISO-8859-1, ISO-8859-2, WINDOWS-1250,
WINDOWS-1252 WINDOWS-1251, WINDOWS-1252
Params: Params:
s = the string to be tested s = the string to be tested
@ -1679,7 +1788,7 @@ size_t validLength(E)(const(E)[] s)
replaced with '?'. replaced with '?'.
Standards: Unicode 5.0, ASCII, ISO-8859-1, ISO-8859-2, WINDOWS-1250, Standards: Unicode 5.0, ASCII, ISO-8859-1, ISO-8859-2, WINDOWS-1250,
WINDOWS-1252 WINDOWS-1251, WINDOWS-1252
Params: Params:
s = the string to be sanitized s = the string to be sanitized
@ -1736,7 +1845,7 @@ immutable(E)[] sanitize(E)(immutable(E)[] s)
This is enforced by the function's in-contract. This is enforced by the function's in-contract.
Standards: Unicode 5.0, ASCII, ISO-8859-1, ISO-8859-2, WINDOWS-1250, Standards: Unicode 5.0, ASCII, ISO-8859-1, ISO-8859-2, WINDOWS-1250,
WINDOWS-1252 WINDOWS-1251, WINDOWS-1252
Params: Params:
s = the string to be sliced s = the string to be sliced
@ -1769,7 +1878,7 @@ body
This is enforced by the function's in-contract. This is enforced by the function's in-contract.
Standards: Unicode 5.0, ASCII, ISO-8859-1, ISO-8859-2, WINDOWS-1250, Standards: Unicode 5.0, ASCII, ISO-8859-1, ISO-8859-2, WINDOWS-1250,
WINDOWS-1252 WINDOWS-1251, WINDOWS-1252
Params: Params:
s = the string to be sliced s = the string to be sliced
@ -1804,7 +1913,7 @@ body
This function supersedes std.utf.toUTFindex(). This function supersedes std.utf.toUTFindex().
Standards: Unicode 5.0, ASCII, ISO-8859-1, ISO-8859-2, WINDOWS-1250, Standards: Unicode 5.0, ASCII, ISO-8859-1, ISO-8859-2, WINDOWS-1250,
WINDOWS-1252 WINDOWS-1251, WINDOWS-1252
Params: Params:
s = the string to be counted s = the string to be counted
@ -1844,7 +1953,7 @@ body
function codePoints() supersedes it more conveniently. function codePoints() supersedes it more conveniently.
Standards: Unicode 5.0, ASCII, ISO-8859-1, ISO-8859-2, WINDOWS-1250, Standards: Unicode 5.0, ASCII, ISO-8859-1, ISO-8859-2, WINDOWS-1250,
WINDOWS-1252 WINDOWS-1251, WINDOWS-1252
Params: Params:
s = the string whose first code point is to be decoded s = the string whose first code point is to be decoded
@ -1871,7 +1980,7 @@ body
This is enforced by the function's in-contract. This is enforced by the function's in-contract.
Standards: Unicode 5.0, ASCII, ISO-8859-1, ISO-8859-2, WINDOWS-1250, Standards: Unicode 5.0, ASCII, ISO-8859-1, ISO-8859-2, WINDOWS-1250,
WINDOWS-1252 WINDOWS-1251, WINDOWS-1252
Params: Params:
s = the string whose first code point is to be decoded s = the string whose first code point is to be decoded
@ -1898,7 +2007,7 @@ body
function will remove it, and return the value INVALID_SEQUENCE. function will remove it, and return the value INVALID_SEQUENCE.
Standards: Unicode 5.0, ASCII, ISO-8859-1, ISO-8859-2, WINDOWS-1250, Standards: Unicode 5.0, ASCII, ISO-8859-1, ISO-8859-2, WINDOWS-1250,
WINDOWS-1252 WINDOWS-1251, WINDOWS-1252
Params: Params:
s = the string whose first code point is to be decoded s = the string whose first code point is to be decoded
@ -1923,7 +2032,7 @@ body
explicitly specify the encoding as a template parameter. explicitly specify the encoding as a template parameter.
Standards: Unicode 5.0, ASCII, ISO-8859-1, ISO-8859-2, WINDOWS-1250, Standards: Unicode 5.0, ASCII, ISO-8859-1, ISO-8859-2, WINDOWS-1250,
WINDOWS-1252 WINDOWS-1251, WINDOWS-1252
Params: Params:
c = the code point to be encoded c = the code point to be encoded
@ -1955,7 +2064,7 @@ body
function codeUnits() supersedes it more conveniently. function codeUnits() supersedes it more conveniently.
Standards: Unicode 5.0, ASCII, ISO-8859-1, ISO-8859-2, WINDOWS-1250, Standards: Unicode 5.0, ASCII, ISO-8859-1, ISO-8859-2, WINDOWS-1250,
WINDOWS-1252 WINDOWS-1251, WINDOWS-1252
Params: Params:
c = the code point to be encoded c = the code point to be encoded
@ -1988,7 +2097,7 @@ body
function codeUnits() supersedes it more conveniently. function codeUnits() supersedes it more conveniently.
Standards: Unicode 5.0, ASCII, ISO-8859-1, ISO-8859-2, WINDOWS-1250, Standards: Unicode 5.0, ASCII, ISO-8859-1, ISO-8859-2, WINDOWS-1250,
WINDOWS-1252 WINDOWS-1251, WINDOWS-1252
Params: Params:
c = the code point to be encoded c = the code point to be encoded
@ -2097,7 +2206,7 @@ if (isNativeOutputRange!(R, E))
function codeUnits() supersedes it more conveniently. function codeUnits() supersedes it more conveniently.
Standards: Unicode 5.0, ASCII, ISO-8859-1, ISO-8859-2, WINDOWS-1250, Standards: Unicode 5.0, ASCII, ISO-8859-1, ISO-8859-2, WINDOWS-1250,
WINDOWS-1252 WINDOWS-1251, WINDOWS-1252
Params: Params:
c = the code point to be encoded c = the code point to be encoded
@ -2149,7 +2258,7 @@ size_t encode(Tgt, Src, R)(in Src[] s, R range)
This function supersedes std.utf.decode(). This function supersedes std.utf.decode().
Standards: Unicode 5.0, ASCII, ISO-8859-1, ISO-8859-2, WINDOWS-1250, Standards: Unicode 5.0, ASCII, ISO-8859-1, ISO-8859-2, WINDOWS-1250,
WINDOWS-1252 WINDOWS-1251, WINDOWS-1252
Params: Params:
s = the string to be decoded s = the string to be decoded
@ -2202,7 +2311,7 @@ body
This function supersedes std.utf.encode(). This function supersedes std.utf.encode().
Standards: Unicode 5.0, ASCII, ISO-8859-1, ISO-8859-2, WINDOWS-1250, Standards: Unicode 5.0, ASCII, ISO-8859-1, ISO-8859-2, WINDOWS-1250,
WINDOWS-1252 WINDOWS-1251, WINDOWS-1252
Params: Params:
c = the code point to be encoded c = the code point to be encoded
@ -2240,7 +2349,7 @@ body
(but note that to!() supersedes it more conveniently). (but note that to!() supersedes it more conveniently).
Standards: Unicode 5.0, ASCII, ISO-8859-1, ISO-8859-2, WINDOWS-1250, Standards: Unicode 5.0, ASCII, ISO-8859-1, ISO-8859-2, WINDOWS-1250,
WINDOWS-1252 WINDOWS-1251, WINDOWS-1252
Params: Params:
s = Source string. $(B Must) be validly encoded. s = Source string. $(B Must) be validly encoded.
@ -2322,7 +2431,7 @@ body
string asciiCharString = to!string(iota(0, 128, 1)); string asciiCharString = to!string(iota(0, 128, 1));
alias Types = AliasSeq!(string, Latin1String, Latin2String, AsciiString, alias Types = AliasSeq!(string, Latin1String, Latin2String, AsciiString,
Windows1250String, Windows1252String, dstring, wstring); Windows1250String, Windows1251String, Windows1252String, dstring, wstring);
foreach (S; Types) foreach (S; Types)
foreach (D; Types) foreach (D; Types)
{ {
@ -2466,6 +2575,7 @@ abstract class EncodingScheme
EncodingScheme.register!EncodingSchemeLatin1; EncodingScheme.register!EncodingSchemeLatin1;
EncodingScheme.register!EncodingSchemeLatin2; EncodingScheme.register!EncodingSchemeLatin2;
EncodingScheme.register!EncodingSchemeWindows1250; EncodingScheme.register!EncodingSchemeWindows1250;
EncodingScheme.register!EncodingSchemeWindows1251;
EncodingScheme.register!EncodingSchemeWindows1252; EncodingScheme.register!EncodingSchemeWindows1252;
EncodingScheme.register!EncodingSchemeUtf8; EncodingScheme.register!EncodingSchemeUtf8;
EncodingScheme.register!EncodingSchemeUtf16Native; EncodingScheme.register!EncodingSchemeUtf16Native;
@ -3055,6 +3165,74 @@ class EncodingSchemeWindows1250 : EncodingScheme
} }
} }
/**
EncodingScheme to handle Windows-1251
This scheme recognises the following names:
"windows-1251"
*/
class EncodingSchemeWindows1251 : EncodingScheme
{
/* // moved to std.internal.phobosinit
shared static this()
{
EncodingScheme.register("std.encoding.EncodingSchemeWindows1251");
}*/
const
{
override string[] names() @safe pure nothrow
{
return
[
"windows-1251"
];
}
override string toString() @safe pure nothrow @nogc
{
return "windows-1251";
}
override bool canEncode(dchar c) @safe pure nothrow @nogc
{
return std.encoding.canEncode!(Windows1251Char)(c);
}
override size_t encodedLength(dchar c) @safe pure nothrow @nogc
{
return std.encoding.encodedLength!(Windows1251Char)(c);
}
override size_t encode(dchar c, ubyte[] buffer) @safe pure nothrow @nogc
{
auto r = cast(Windows1251Char[]) buffer;
return std.encoding.encode(c,r);
}
override dchar decode(ref const(ubyte)[] s) @safe pure nothrow @nogc
{
auto t = cast(const(Windows1251Char)[]) s;
dchar c = std.encoding.decode(t);
s = s[$-t.length..$];
return c;
}
override dchar safeDecode(ref const(ubyte)[] s) @safe pure nothrow @nogc
{
auto t = cast(const(Windows1251Char)[]) s;
dchar c = std.encoding.safeDecode(t);
s = s[$-t.length..$];
return c;
}
override @property immutable(ubyte)[] replacementSequence() @safe pure nothrow @nogc
{
return cast(immutable(ubyte)[])"?";
}
}
}
/** /**
EncodingScheme to handle Windows-1252 EncodingScheme to handle Windows-1252