// Copyright (c) 1999-2008 by Digital Mars // All Rights Reserved // written by Walter Bright // http://www.digitalmars.com // License for redistribution is by either the Artistic License // in artistic.txt, or the GNU General Public License in gnu.txt. // See the included readme.txt for details. #include /********************************************* * Convert from named entity to its encoding. * For reference: * http://www.htmlhelp.com/reference/html40/entities/ * http://www.w3.org/TR/1999/REC-html401-19991224/sgml/entities.html */ struct NameId { const char *name; unsigned short value; }; #if IN_GCC static NameId namesA[]={ "Aacgr", 0x0386, "aacgr", 0x03AC, "Aacute", 0x00C1, "aacute", 0x00E1, "Abreve", 0x0102, "abreve", 0x0103, "Acirc", 0x00C2, "acirc", 0x00E2, "acute", 0x00B4, "Acy", 0x0410, "acy", 0x0430, "AElig", 0x00C6, "aelig", 0x00E6, "Agr", 0x0391, "agr", 0x03B1, "Agrave", 0x00C0, "agrave", 0x00E0, "aleph", 0x2135, "alpha", 0x03B1, "Amacr", 0x0100, "amacr", 0x0101, "amalg", 0x2210, "amp", 0x0026, "and", 0x2227, "ang", 0x2220, "ang90", 0x221F, "angmsd", 0x2221, "angsph", 0x2222, "angst", 0x212B, "Aogon", 0x0104, "aogon", 0x0105, "ap", 0x2248, "ape", 0x224A, "apos", 0x0027, "Aring", 0x00C5, "aring", 0x00E5, "ast", 0x002A, "asymp", 0x224D, "Atilde", 0x00C3, "atilde", 0x00E3, "Auml", 0x00C4, "auml", 0x00E4, NULL, 0 }; static NameId namesB[]={ "barwed", 0x22BC, "Barwed", 0x2306, "bcong", 0x224C, "Bcy", 0x0411, "bcy", 0x0431, "becaus", 0x2235, "bepsi", 0x220D, "bernou", 0x212C, "beta", 0x03B2, "beth", 0x2136, "Bgr", 0x0392, "bgr", 0x03B2, "blank", 0x2423, "blk12", 0x2592, "blk14", 0x2591, "blk34", 0x2593, "block", 0x2588, "bottom", 0x22A5, "bowtie", 0x22C8, "boxdl", 0x2510, "boxDL", 0x2555, "boxdL", 0x2556, "boxDl", 0x2557, "boxdr", 0x250C, "boxDR", 0x2552, "boxDr", 0x2553, "boxdR", 0x2554, "boxh", 0x2500, "boxH", 0x2550, "boxhd", 0x252C, "boxhD", 0x2564, "boxHD", 0x2565, "boxHd", 0x2566, "boxhu", 0x2534, "boxhU", 0x2567, "boxHU", 0x2568, "boxHu", 0x2569, "boxul", 0x2518, "boxUL", 0x255B, "boxUl", 0x255C, "boxuL", 0x255D, "boxur", 0x2514, "boxUR", 0x2558, "boxuR", 0x2559, "boxUr", 0x255A, "boxv", 0x2502, "boxV", 0x2551, "boxvh", 0x253C, "boxvH", 0x256A, "boxVH", 0x256B, "boxVh", 0x256C, "boxvl", 0x2524, "boxvL", 0x2561, "boxVL", 0x2562, "boxVl", 0x2563, "boxvr", 0x251C, "boxvR", 0x255E, "boxVR", 0x255F, "boxVr", 0x2560, "bprime", 0x2035, "breve", 0x02D8, "brvbar", 0x00A6, "bsim", 0x223D, "bsime", 0x22CD, "bsol", 0x005C, "bull", 0x2022, "bump", 0x224E, "bumpe", 0x224F, NULL, 0 }; static NameId namesC[]={ "Cacute", 0x0106, "cacute", 0x0107, "cap", 0x2229, "Cap", 0x22D2, "caret", 0x2041, "caron", 0x02C7, "Ccaron", 0x010C, "ccaron", 0x010D, "Ccedil", 0x00C7, "ccedil", 0x00E7, "Ccirc", 0x0108, "ccirc", 0x0109, "Cdot", 0x010A, "cdot", 0x010B, "cedil", 0x00B8, "cent", 0x00A2, "CHcy", 0x0427, "chcy", 0x0447, "check", 0x2713, "chi", 0x03C7, "cir", 0x25CB, "circ", 0x005E, "cire", 0x2257, "clubs", 0x2663, "colon", 0x003A, "colone", 0x2254, "comma", 0x002C, "commat", 0x0040, "comp", 0x2201, "compfn", 0x2218, "cong", 0x2245, "conint", 0x222E, "coprod", 0x2210, "copy", 0x00A9, "copysr", 0x2117, "cross", 0x2717, "cuepr", 0x22DE, "cuesc", 0x22DF, "cularr", 0x21B6, "cup", 0x222A, "Cup", 0x22D3, "cupre", 0x227C, "curarr", 0x21B7, "curren", 0x00A4, "cuvee", 0x22CE, "cuwed", 0x22CF, NULL, 0 }; static NameId namesD[]={ "dagger", 0x2020, "Dagger", 0x2021, "daleth", 0x2138, "darr", 0x2193, "dArr", 0x21D3, "darr2", 0x21CA, "dash", 0x2010, "dashv", 0x22A3, "dblac", 0x02DD, "Dcaron", 0x010E, "dcaron", 0x010F, "Dcy", 0x0414, "dcy", 0x0434, "deg", 0x00B0, "Delta", 0x0394, "delta", 0x03B4, "Dgr", 0x0394, "dgr", 0x03B4, "dharl", 0x21C3, "dharr", 0x21C2, "diam", 0x22C4, "diams", 0x2666, "die", 0x00A8, "divide", 0x00F7, "divonx", 0x22C7, "DJcy", 0x0402, "djcy", 0x0452, "dlarr", 0x2199, "dlcorn", 0x231E, "dlcrop", 0x230D, "dollar", 0x0024, "Dot", 0x00A8, "dot", 0x02D9, "DotDot", 0x20DC, "drarr", 0x2198, "drcorn", 0x231F, "drcrop", 0x230C, "DScy", 0x0405, "dscy", 0x0455, "Dstrok", 0x0110, "dstrok", 0x0111, "dtri", 0x25BF, "dtrif", 0x25BE, "DZcy", 0x040F, "dzcy", 0x045F, NULL, 0 }; static NameId namesE[]={ "Eacgr", 0x0388, "eacgr", 0x03AD, "Eacute", 0x00C9, "eacute", 0x00E9, "Ecaron", 0x011A, "ecaron", 0x011B, "ecir", 0x2256, "Ecirc", 0x00CA, "ecirc", 0x00EA, "ecolon", 0x2255, "Ecy", 0x042D, "ecy", 0x044D, "Edot", 0x0116, "edot", 0x0117, "eDot", 0x2251, "EEacgr", 0x0389, "eeacgr", 0x03AE, "EEgr", 0x0397, "eegr", 0x03B7, "efDot", 0x2252, "Egr", 0x0395, "egr", 0x03B5, "Egrave", 0x00C8, "egrave", 0x00E8, "egs", 0x22DD, "ell", 0x2113, "els", 0x22DC, "Emacr", 0x0112, "emacr", 0x0113, "empty", 0x2205, "emsp", 0x2003, "emsp13", 0x2004, "emsp14", 0x2005, "ENG", 0x014A, "eng", 0x014B, "ensp", 0x2002, "Eogon", 0x0118, "eogon", 0x0119, "epsi", 0x220A, "epsis", 0x220A, "epsiv", 0x03B5, "equals", 0x003D, "equiv", 0x2261, "erDot", 0x2253, "esdot", 0x2250, "eta", 0x03B7, "ETH", 0x00D0, "eth", 0x00F0, "Euml", 0x00CB, "euml", 0x00EB, "excl", 0x0021, "exist", 0x2203, NULL, 0 }; static NameId namesF[]={ "Fcy", 0x0424, "fcy", 0x0444, "female", 0x2640, "ffilig", 0xFB03, "fflig", 0xFB00, "ffllig", 0xFB04, "filig", 0xFB01, "flat", 0x266D, "fllig", 0xFB02, "fnof", 0x0192, "forall", 0x2200, "fork", 0x22D4, "frac12", 0x00BD, "frac13", 0x2153, "frac14", 0x00BC, "frac15", 0x2155, "frac16", 0x2159, "frac18", 0x215B, "frac23", 0x2154, "frac25", 0x2156, "frac34", 0x00BE, "frac35", 0x2157, "frac38", 0x215C, "frac45", 0x2158, "frac56", 0x215A, "frac58", 0x215D, "frac78", 0x215E, "frown", 0x2322, NULL, 0 }; static NameId namesG[]={ "gacute", 0x01F5, "Gamma", 0x0393, "gamma", 0x03B3, "gammad", 0x03DC, "gap", 0x2273, "Gbreve", 0x011E, "gbreve", 0x011F, "Gcedil", 0x0122, "Gcirc", 0x011C, "gcirc", 0x011D, "Gcy", 0x0413, "gcy", 0x0433, "Gdot", 0x0120, "gdot", 0x0121, "ge", 0x2265, "gE", 0x2267, "gel", 0x22DB, "gEl", 0x22DB, "ges", 0x2265, "Gg", 0x22D9, "Ggr", 0x0393, "ggr", 0x03B3, "gimel", 0x2137, "GJcy", 0x0403, "gjcy", 0x0453, "gl", 0x2277, "gnap", 0xE411, "gne", 0x2269, "gnE", 0x2269, "gnsim", 0x22E7, "grave", 0x0060, "gsdot", 0x22D7, "gsim", 0x2273, "gt", 0x003E, "Gt", 0x226B, "gvnE", 0x2269, NULL, 0 }; static NameId namesH[]={ "hairsp", 0x200A, "half", 0x00BD, "hamilt", 0x210B, "HARDcy", 0x042A, "hardcy", 0x044A, "harr", 0x2194, "hArr", 0x21D4, "harrw", 0x21AD, "Hcirc", 0x0124, "hcirc", 0x0125, "hearts", 0x2665, "hellip", 0x2026, "horbar", 0x2015, "Hstrok", 0x0126, "hstrok", 0x0127, "hybull", 0x2043, "hyphen", 0x002D, NULL, 0 }; static NameId namesI[]={ "Iacgr", 0x038A, "iacgr", 0x03AF, "Iacute", 0x00CD, "iacute", 0x00ED, "Icirc", 0x00CE, "icirc", 0x00EE, "Icy", 0x0418, "icy", 0x0438, "idiagr", 0x0390, "Idigr", 0x03AA, "idigr", 0x03CA, "Idot", 0x0130, "IEcy", 0x0415, "iecy", 0x0435, "iexcl", 0x00A1, "iff", 0x21D4, "Igr", 0x0399, "igr", 0x03B9, "Igrave", 0x00CC, "igrave", 0x00EC, "IJlig", 0x0132, "ijlig", 0x0133, "Imacr", 0x012A, "imacr", 0x012B, "image", 0x2111, "incare", 0x2105, "infin", 0x221E, "inodot", 0x0131, "int", 0x222B, "intcal", 0x22BA, "IOcy", 0x0401, "iocy", 0x0451, "Iogon", 0x012E, "iogon", 0x012F, "iota", 0x03B9, "iquest", 0x00BF, "isin", 0x220A, "Itilde", 0x0128, "itilde", 0x0129, "Iukcy", 0x0406, "iukcy", 0x0456, "Iuml", 0x00CF, "iuml", 0x00EF, NULL, 0 }; static NameId namesJ[]={ "Jcirc", 0x0134, "jcirc", 0x0135, "Jcy", 0x0419, "jcy", 0x0439, "Jsercy", 0x0408, "jsercy", 0x0458, "Jukcy", 0x0404, "jukcy", 0x0454, NULL, 0 }; static NameId namesK[]={ "kappa", 0x03BA, "kappav", 0x03F0, "Kcedil", 0x0136, "kcedil", 0x0137, "Kcy", 0x041A, "kcy", 0x043A, "Kgr", 0x039A, "kgr", 0x03BA, "kgreen", 0x0138, "KHcy", 0x0425, "khcy", 0x0445, "KHgr", 0x03A7, "khgr", 0x03C7, "KJcy", 0x040C, "kjcy", 0x045C, NULL, 0 }; static NameId namesL[]={ "lAarr", 0x21DA, "Lacute", 0x0139, "lacute", 0x013A, "lagran", 0x2112, "Lambda", 0x039B, "lambda", 0x03BB, "lang", 0x3008, "lap", 0x2272, "laquo", 0x00AB, "larr", 0x2190, "Larr", 0x219E, "lArr", 0x21D0, "larr2", 0x21C7, "larrhk", 0x21A9, "larrlp", 0x21AB, "larrtl", 0x21A2, "Lcaron", 0x013D, "lcaron", 0x013E, "Lcedil", 0x013B, "lcedil", 0x013C, "lceil", 0x2308, "lcub", 0x007B, "Lcy", 0x041B, "lcy", 0x043B, "ldot", 0x22D6, "ldquo", 0x201C, "ldquor", 0x201E, "le", 0x2264, "lE", 0x2266, "leg", 0x22DA, "lEg", 0x22DA, "les", 0x2264, "lfloor", 0x230A, "lg", 0x2276, "Lgr", 0x039B, "lgr", 0x03BB, "lhard", 0x21BD, "lharu", 0x21BC, "lhblk", 0x2584, "LJcy", 0x0409, "ljcy", 0x0459, "Ll", 0x22D8, "Lmidot", 0x013F, "lmidot", 0x0140, "lnap", 0xE2A2, "lne", 0x2268, "lnE", 0x2268, "lnsim", 0x22E6, "lowast", 0x2217, "lowbar", 0x005F, "loz", 0x25CA, "lozf", 0x2726, "lpar", 0x0028, "lrarr2", 0x21C6, "lrhar2", 0x21CB, "lsh", 0x21B0, "lsim", 0x2272, "lsqb", 0x005B, "lsquo", 0x2018, "lsquor", 0x201A, "Lstrok", 0x0141, "lstrok", 0x0142, "lt", 0x003C, "Lt", 0x226A, "lthree", 0x22CB, "ltimes", 0x22C9, "ltri", 0x25C3, "ltrie", 0x22B4, "ltrif", 0x25C2, "lvnE", 0x2268, NULL, 0 }; static NameId namesM[]={ "macr", 0x00AF, "male", 0x2642, "malt", 0x2720, "map", 0x21A6, "marker", 0x25AE, "Mcy", 0x041C, "mcy", 0x043C, "mdash", 0x2014, "Mgr", 0x039C, "mgr", 0x03BC, "micro", 0x00B5, "mid", 0x2223, "middot", 0x00B7, "minus", 0x2212, "minusb", 0x229F, "mldr", 0x2026, "mnplus", 0x2213, "models", 0x22A7, "mu", 0x03BC, "mumap", 0x22B8, NULL, 0 }; static NameId namesN[]={ "nabla", 0x2207, "Nacute", 0x0143, "nacute", 0x0144, "nap", 0x2249, "napos", 0x0149, "natur", 0x266E, // "nbsp", 0x00A0, "nbsp", 32, // make non-breaking space appear as space "Ncaron", 0x0147, "ncaron", 0x0148, "Ncedil", 0x0145, "ncedil", 0x0146, "ncong", 0x2247, "Ncy", 0x041D, "ncy", 0x043D, "ndash", 0x2013, "ne", 0x2260, "nearr", 0x2197, "nequiv", 0x2262, "nexist", 0x2204, "nge", 0x2271, "ngE", 0x2271, "nges", 0x2271, "Ngr", 0x039D, "ngr", 0x03BD, "ngt", 0x226F, "nharr", 0x21AE, "nhArr", 0x21CE, "ni", 0x220D, "NJcy", 0x040A, "njcy", 0x045A, "nlarr", 0x219A, "nlArr", 0x21CD, "nldr", 0x2025, "nle", 0x2270, "nlE", 0x2270, "nles", 0x2270, "nlt", 0x226E, "nltri", 0x22EA, "nltrie", 0x22EC, "nmid", 0x2224, "not", 0x00AC, "notin", 0x2209, "npar", 0x2226, "npr", 0x2280, "npre", 0x22E0, "nrarr", 0x219B, "nrArr", 0x21CF, "nrtri", 0x22EB, "nrtrie", 0x22ED, "nsc", 0x2281, "nsce", 0x22E1, "nsim", 0x2241, "nsime", 0x2244, "nsmid", 0xE2AA, "nspar", 0x2226, "nsub", 0x2284, "nsube", 0x2288, "nsubE", 0x2288, "nsup", 0x2285, "nsupe", 0x2289, "nsupE", 0x2289, "Ntilde", 0x00D1, "ntilde", 0x00F1, "nu", 0x03BD, "num", 0x0023, "numero", 0x2116, "numsp", 0x2007, "nvdash", 0x22AC, "nvDash", 0x22AD, "nVdash", 0x22AE, "nVDash", 0x22AF, "nwarr", 0x2196, NULL, 0 }; static NameId namesO[]={ "Oacgr", 0x038C, "oacgr", 0x03CC, "Oacute", 0x00D3, "oacute", 0x00F3, "oast", 0x229B, "ocir", 0x229A, "Ocirc", 0x00D4, "ocirc", 0x00F4, "Ocy", 0x041E, "ocy", 0x043E, "odash", 0x229D, "Odblac", 0x0150, "odblac", 0x0151, "odot", 0x2299, "OElig", 0x0152, "oelig", 0x0153, "ogon", 0x02DB, "Ogr", 0x039F, "ogr", 0x03BF, "Ograve", 0x00D2, "ograve", 0x00F2, "OHacgr", 0x038F, "ohacgr", 0x03CE, "OHgr", 0x03A9, "ohgr", 0x03C9, "ohm", 0x2126, "olarr", 0x21BA, "Omacr", 0x014C, "omacr", 0x014D, "Omega", 0x03A9, "omega", 0x03C9, "ominus", 0x2296, "oplus", 0x2295, "or", 0x2228, "orarr", 0x21BB, "order", 0x2134, "ordf", 0x00AA, "ordm", 0x00BA, "oS", 0x24C8, "Oslash", 0x00D8, "oslash", 0x00F8, "osol", 0x2298, "Otilde", 0x00D5, "otilde", 0x00F5, "otimes", 0x2297, "Ouml", 0x00D6, "ouml", 0x00F6, NULL, 0 }; static NameId namesP[]={ "par", 0x2225, "para", 0x00B6, "part", 0x2202, "Pcy", 0x041F, "pcy", 0x043F, "percnt", 0x0025, "period", 0x002E, "permil", 0x2030, "perp", 0x22A5, "Pgr", 0x03A0, "pgr", 0x03C0, "PHgr", 0x03A6, "phgr", 0x03C6, "Phi", 0x03A6, "phis", 0x03C6, "phiv", 0x03D5, "phmmat", 0x2133, "phone", 0x260E, "Pi", 0x03A0, "pi", 0x03C0, "piv", 0x03D6, "planck", 0x210F, "plus", 0x002B, "plusb", 0x229E, "plusdo", 0x2214, "plusmn", 0x00B1, "pound", 0x00A3, "pr", 0x227A, "prap", 0x227E, "pre", 0x227C, "prime", 0x2032, "Prime", 0x2033, "prnap", 0x22E8, "prnE", 0xE2B3, "prnsim", 0x22E8, "prod", 0x220F, "prop", 0x221D, "prsim", 0x227E, "PSgr", 0x03A8, "psgr", 0x03C8, "Psi", 0x03A8, "psi", 0x03C8, "puncsp", 0x2008, NULL, 0 }; static NameId namesQ[]={ "quest", 0x003F, "quot", 0x0022, NULL, 0 }; static NameId namesR[]={ "rAarr", 0x21DB, "Racute", 0x0154, "racute", 0x0155, "radic", 0x221A, "rang", 0x3009, "raquo", 0x00BB, "rarr", 0x2192, "Rarr", 0x21A0, "rArr", 0x21D2, "rarr2", 0x21C9, "rarrhk", 0x21AA, "rarrlp", 0x21AC, "rarrtl", 0x21A3, "rarrw", 0x219D, "Rcaron", 0x0158, "rcaron", 0x0159, "Rcedil", 0x0156, "rcedil", 0x0157, "rceil", 0x2309, "rcub", 0x007D, "Rcy", 0x0420, "rcy", 0x0440, "rdquo", 0x201D, "rdquor", 0x201C, "real", 0x211C, "rect", 0x25AD, "reg", 0x00AE, "rfloor", 0x230B, "Rgr", 0x03A1, "rgr", 0x03C1, "rhard", 0x21C1, "rharu", 0x21C0, "rho", 0x03C1, "rhov", 0x03F1, "ring", 0x02DA, "rlarr2", 0x21C4, "rlhar2", 0x21CC, "rpar", 0x0029, "rpargt", 0xE291, "rsh", 0x21B1, "rsqb", 0x005D, "rsquo", 0x2019, "rsquor", 0x2018, "rthree", 0x22CC, "rtimes", 0x22CA, "rtri", 0x25B9, "rtrie", 0x22B5, "rtrif", 0x25B8, "rx", 0x211E, NULL, 0 }; static NameId namesS[]={ "Sacute", 0x015A, "sacute", 0x015B, "samalg", 0x2210, "sbsol", 0xFE68, "sc", 0x227B, "scap", 0x227F, "Scaron", 0x0160, "scaron", 0x0161, "sccue", 0x227D, "sce", 0x227D, "Scedil", 0x015E, "scedil", 0x015F, "Scirc", 0x015C, "scirc", 0x015D, "scnap", 0x22E9, "scnE", 0xE2B5, "scnsim", 0x22E9, "scsim", 0x227F, "Scy", 0x0421, "scy", 0x0441, "sdot", 0x22C5, "sdotb", 0x22A1, "sect", 0x00A7, "semi", 0x003B, "setmn", 0x2216, "sext", 0x2736, "sfgr", 0x03C2, "sfrown", 0x2322, "Sgr", 0x03A3, "sgr", 0x03C3, "sharp", 0x266F, "SHCHcy", 0x0429, "shchcy", 0x0449, "SHcy", 0x0428, "shcy", 0x0448, "shy", 0x00AD, "Sigma", 0x03A3, "sigma", 0x03C3, "sigmav", 0x03C2, "sim", 0x223C, "sime", 0x2243, "smid", 0xE301, "smile", 0x2323, "SOFTcy", 0x042C, "softcy", 0x044C, "sol", 0x002F, "spades", 0x2660, "spar", 0x2225, "sqcap", 0x2293, "sqcup", 0x2294, "sqsub", 0x228F, "sqsube", 0x2291, "sqsup", 0x2290, "sqsupe", 0x2292, "squ", 0x25A1, "square", 0x25A1, "squf", 0x25AA, "ssetmn", 0x2216, "ssmile", 0x2323, "sstarf", 0x22C6, "star", 0x22C6, "starf", 0x2605, "sub", 0x2282, "Sub", 0x22D0, "sube", 0x2286, "subE", 0x2286, "subne", 0x228A, "subnE", 0x228A, "sum", 0x2211, "sung", 0x2669, "sup", 0x2283, "Sup", 0x22D1, "sup1", 0x00B9, "sup2", 0x00B2, "sup3", 0x00B3, "supe", 0x2287, "supE", 0x2287, "supne", 0x228B, "supnE", 0x228B, "szlig", 0x00DF, NULL, 0 }; static NameId namesT[]={ "target", 0x2316, "tau", 0x03C4, "Tcaron", 0x0164, "tcaron", 0x0165, "Tcedil", 0x0162, "tcedil", 0x0163, "Tcy", 0x0422, "tcy", 0x0442, "tdot", 0x20DB, "telrec", 0x2315, "Tgr", 0x03A4, "tgr", 0x03C4, "there4", 0x2234, "Theta", 0x0398, "thetas", 0x03B8, "thetav", 0x03D1, "THgr", 0x0398, "thgr", 0x03B8, "thinsp", 0x2009, "thkap", 0x2248, "thksim", 0x223C, "THORN", 0x00DE, "thorn", 0x00FE, "tilde", 0x02DC, "times", 0x00D7, "timesb", 0x22A0, "top", 0x22A4, "tprime", 0x2034, "trade", 0x2122, "trie", 0x225C, "TScy", 0x0426, "tscy", 0x0446, "TSHcy", 0x040B, "tshcy", 0x045B, "Tstrok", 0x0166, "tstrok", 0x0167, "twixt", 0x226C, NULL, 0 }; static NameId namesU[]={ "Uacgr", 0x038E, "uacgr", 0x03CD, "Uacute", 0x00DA, "uacute", 0x00FA, "uarr", 0x2191, "uArr", 0x21D1, "uarr2", 0x21C8, "Ubrcy", 0x040E, "ubrcy", 0x045E, "Ubreve", 0x016C, "ubreve", 0x016D, "Ucirc", 0x00DB, "ucirc", 0x00FB, "Ucy", 0x0423, "ucy", 0x0443, "Udblac", 0x0170, "udblac", 0x0171, "udiagr", 0x03B0, "Udigr", 0x03AB, "udigr", 0x03CB, "Ugr", 0x03A5, "ugr", 0x03C5, "Ugrave", 0x00D9, "ugrave", 0x00F9, "uharl", 0x21BF, "uharr", 0x21BE, "uhblk", 0x2580, "ulcorn", 0x231C, "ulcrop", 0x230F, "Umacr", 0x016A, "umacr", 0x016B, "uml", 0x00A8, "Uogon", 0x0172, "uogon", 0x0173, "uplus", 0x228E, "upsi", 0x03C5, "Upsi", 0x03D2, "urcorn", 0x231D, "urcrop", 0x230E, "Uring", 0x016E, "uring", 0x016F, "Utilde", 0x0168, "utilde", 0x0169, "utri", 0x25B5, "utrif", 0x25B4, "Uuml", 0x00DC, "uuml", 0x00FC, NULL, 0 }; static NameId namesV[]={ "varr", 0x2195, "vArr", 0x21D5, "Vcy", 0x0412, "vcy", 0x0432, "vdash", 0x22A2, "vDash", 0x22A8, "Vdash", 0x22A9, "veebar", 0x22BB, "vellip", 0x22EE, "verbar", 0x007C, "Verbar", 0x2016, "vltri", 0x22B2, "vprime", 0x2032, "vprop", 0x221D, "vrtri", 0x22B3, "vsubne", 0x228A, "vsubnE", 0xE2B8, "vsupne", 0x228B, "vsupnE", 0x228B, "Vvdash", 0x22AA, NULL, 0 }; static NameId namesW[]={ "Wcirc", 0x0174, "wcirc", 0x0175, "wedgeq", 0x2259, "weierp", 0x2118, "wreath", 0x2240, NULL, 0 }; static NameId namesX[]={ "xcirc", 0x25CB, "xdtri", 0x25BD, "Xgr", 0x039E, "xgr", 0x03BE, "xharr", 0x2194, "xhArr", 0x2194, "Xi", 0x039E, "xi", 0x03BE, "xlArr", 0x21D0, "xrArr", 0x21D2, "xutri", 0x25B3, NULL, 0 }; static NameId namesY[]={ "Yacute", 0x00DD, "yacute", 0x00FD, "YAcy", 0x042F, "yacy", 0x044F, "Ycirc", 0x0176, "ycirc", 0x0177, "Ycy", 0x042B, "ycy", 0x044B, "yen", 0x00A5, "YIcy", 0x0407, "yicy", 0x0457, "YUcy", 0x042E, "yucy", 0x044E, "yuml", 0x00FF, "Yuml", 0x0178, NULL, 0 }; static NameId namesZ[]={ "Zacute", 0x0179, "zacute", 0x017A, "Zcaron", 0x017D, "zcaron", 0x017E, "Zcy", 0x0417, "zcy", 0x0437, "Zdot", 0x017B, "zdot", 0x017C, "zeta", 0x03B6, "Zgr", 0x0396, "zgr", 0x03B6, "ZHcy", 0x0416, "zhcy", 0x0436, NULL, 0 }; // @todo@ order namesTable and names? by frequency static NameId* namesTable[] = { namesA, namesB, namesC, namesD, namesE, namesF, namesG, namesH, namesI, namesJ, namesK, namesL, namesM, namesN, namesO, namesP, namesQ, namesR, namesS, namesT, namesU, namesV, namesW, namesX, namesY, namesZ, NULL }; int HtmlNamedEntity(unsigned char *p, int length) { int tableIndex = tolower(*p) - 'a'; if (tableIndex >= 0 && tableIndex < 26) { NameId* names = namesTable[tableIndex]; int i; for (i = 0; names[i].name; i++){ if (strncmp(names[i].name, (char *)p, length) == 0){ return names[i].value; } } } error("unrecognized character entity \"%.*s\"", length, p); return -1; } #else //TODO: Merge Walter's list with Thomas' static NameId names[] = { // Entities "quot", 34, "amp", 38, "lt", 60, "gt", 62, "OElig", 338, "oelig", 339, "Scaron", 352, "scaron", 353, "Yuml", 376, "circ", 710, "tilde", 732, "ensp", 8194, "emsp", 8195, "thinsp", 8201, "zwnj", 8204, "zwj", 8205, "lrm", 8206, "rlm", 8207, "ndash", 8211, "mdash", 8212, "lsquo", 8216, "rsquo", 8217, "sbquo", 8218, "ldquo", 8220, "rdquo", 8221, "bdquo", 8222, "dagger", 8224, "Dagger", 8225, "permil", 8240, "lsaquo", 8249, "rsaquo", 8250, "euro", 8364, // Latin-1 (ISO-8859-1) Entities "nbsp", 160, "iexcl", 161, "cent", 162, "pound", 163, "curren", 164, "yen", 165, "brvbar", 166, "sect", 167, "uml", 168, "copy", 169, "ordf", 170, "laquo", 171, "not", 172, "shy", 173, "reg", 174, "macr", 175, "deg", 176, "plusmn", 177, "sup2", 178, "sup3", 179, "acute", 180, "micro", 181, "para", 182, "middot", 183, "cedil", 184, "sup1", 185, "ordm", 186, "raquo", 187, "frac14", 188, "frac12", 189, "frac34", 190, "iquest", 191, "Agrave", 192, "Aacute", 193, "Acirc", 194, "Atilde", 195, "Auml", 196, "Aring", 197, "AElig", 198, "Ccedil", 199, "Egrave", 200, "Eacute", 201, "Ecirc", 202, "Euml", 203, "Igrave", 204, "Iacute", 205, "Icirc", 206, "Iuml", 207, "ETH", 208, "Ntilde", 209, "Ograve", 210, "Oacute", 211, "Ocirc", 212, "Otilde", 213, "Ouml", 214, "times", 215, "Oslash", 216, "Ugrave", 217, "Uacute", 218, "Ucirc", 219, "Uuml", 220, "Yacute", 221, "THORN", 222, "szlig", 223, "agrave", 224, "aacute", 225, "acirc", 226, "atilde", 227, "auml", 228, "aring", 229, "aelig", 230, "ccedil", 231, "egrave", 232, "eacute", 233, "ecirc", 234, "euml", 235, "igrave", 236, "iacute", 237, "icirc", 238, "iuml", 239, "eth", 240, "ntilde", 241, "ograve", 242, "oacute", 243, "ocirc", 244, "otilde", 245, "ouml", 246, "divide", 247, "oslash", 248, "ugrave", 249, "uacute", 250, "ucirc", 251, "uuml", 252, "yacute", 253, "thorn", 254, "yuml", 255, // Symbols and Greek letter entities "fnof", 402, "Alpha", 913, "Beta", 914, "Gamma", 915, "Delta", 916, "Epsilon", 917, "Zeta", 918, "Eta", 919, "Theta", 920, "Iota", 921, "Kappa", 922, "Lambda", 923, "Mu", 924, "Nu", 925, "Xi", 926, "Omicron", 927, "Pi", 928, "Rho", 929, "Sigma", 931, "Tau", 932, "Upsilon", 933, "Phi", 934, "Chi", 935, "Psi", 936, "Omega", 937, "alpha", 945, "beta", 946, "gamma", 947, "delta", 948, "epsilon", 949, "zeta", 950, "eta", 951, "theta", 952, "iota", 953, "kappa", 954, "lambda", 955, "mu", 956, "nu", 957, "xi", 958, "omicron", 959, "pi", 960, "rho", 961, "sigmaf", 962, "sigma", 963, "tau", 964, "upsilon", 965, "phi", 966, "chi", 967, "psi", 968, "omega", 969, "thetasym", 977, "upsih", 978, "piv", 982, "bull", 8226, "hellip", 8230, "prime", 8242, "Prime", 8243, "oline", 8254, "frasl", 8260, "weierp", 8472, "image", 8465, "real", 8476, "trade", 8482, "alefsym", 8501, "larr", 8592, "uarr", 8593, "rarr", 8594, "darr", 8595, "harr", 8596, "crarr", 8629, "lArr", 8656, "uArr", 8657, "rArr", 8658, "dArr", 8659, "hArr", 8660, "forall", 8704, "part", 8706, "exist", 8707, "empty", 8709, "nabla", 8711, "isin", 8712, "notin", 8713, "ni", 8715, "prod", 8719, "sum", 8721, "minus", 8722, "lowast", 8727, "radic", 8730, "prop", 8733, "infin", 8734, "ang", 8736, "and", 8743, "or", 8744, "cap", 8745, "cup", 8746, "int", 8747, "there4", 8756, "sim", 8764, "cong", 8773, "asymp", 8776, "ne", 8800, "equiv", 8801, "le", 8804, "ge", 8805, "sub", 8834, "sup", 8835, "nsub", 8836, "sube", 8838, "supe", 8839, "oplus", 8853, "otimes", 8855, "perp", 8869, "sdot", 8901, "lceil", 8968, "rceil", 8969, "lfloor", 8970, "rfloor", 8971, "lang", 9001, "rang", 9002, "loz", 9674, "spades", 9824, "clubs", 9827, "hearts", 9829, "diams", 9830, }; int HtmlNamedEntity(unsigned char *p, int length) { int i; // BUG: this is a dumb, slow linear search for (i = 0; i < sizeof(names) / sizeof(names[0]); i++) { // Entries are case sensitive if (memcmp(names[i].name, (char *)p, length) == 0 && !names[i].name[length]) return names[i].value; } return -1; } #endif