diff --git a/markdown.d b/markdown.d new file mode 100644 index 0000000..2675c57 --- /dev/null +++ b/markdown.d @@ -0,0 +1,9529 @@ +/++ + +MD4C: Markdown parser for C +(http://github.com/mity/md4c) + +Copyright: + +Copyright (c) 2016-2019 Martin Mitas +Copyright (c) 2019 Guillaume Piolat (D translation as commonmarkd package: https://github.com/AuburnSounds/commonmark-d ) +Somewhat modified by Adam D. Ruppe in 2024. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +the rights to use, copy, modify, merge, publish, distribute, sublicense, +and/or sell copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +IN THE SOFTWARE. ++/ +module arsd.markdown; + +/// Options for Markdown parsing. +enum MarkdownFlag : int +{ + collapseWhitespace = 0x0001, /** Collapse non-trivial whitespace into single ' ' */ + permissiveATXHeaders = 0x0002, /** Do not require space in ATX headers ( ###header ) */ + permissiveURLAutoLinks = 0x0004, /** Recognize URLs as autolinks even without '<', '>' */ + permissiveEmailAutoLinks = 0x0008, /** Recognize e-mails as autolinks even without '<', '>' and 'mailto:' */ + noIndentedCodeBlocks = 0x0010, /** Disable indented code blocks. (Only fenced code works.) */ + noHTMLBlocks = 0x0020, /** Disable raw HTML blocks. */ + noHTMLSpans = 0x0040, /** Disable raw HTML (inline). */ + tablesExtension = 0x0100, /** Enable tables extension. */ + enableStrikeThrough = 0x0200, /** Enable strikethrough extension. */ + permissiveWWWAutoLinks = 0x0400, /** Enable WWW autolinks (even without any scheme prefix, if they begin with 'www.') */ + enableTaskLists = 0x0800, /** Enable task list extension. */ + latexMathSpans = 0x1000, /** Enable $ and $$ containing LaTeX equations. */ + + permissiveAutoLinks = permissiveEmailAutoLinks | permissiveURLAutoLinks | permissiveWWWAutoLinks, /** Recognize e-mails, URL and WWW links */ + noHTML = noHTMLBlocks | noHTMLSpans, /** Disable raw HTML. */ + + /* Convenient sets of flags corresponding to well-known Markdown dialects. + * + * Note we may only support subset of features of the referred dialect. + * The constant just enables those extensions which bring us as close as + * possible given what features we implement. + * + * ABI compatibility note: Meaning of these can change in time as new + * extensions, bringing the dialect closer to the original, are implemented. + */ + dialectCommonMark = 0, /** CommonMark */ + dialectGitHub = (permissiveAutoLinks | tablesExtension | enableStrikeThrough | enableTaskLists), /** Github Flavoured Markdown */ +} + +/// Parses a Markdown input, returns HTML. `flags` set the particular Markdown dialect that is used. +string convertMarkdownToHTML(const(char)[] input, MarkdownFlag flags = MarkdownFlag.dialectCommonMark) +{ + import core.stdc.stdlib; + + static struct GrowableBuffer + { + nothrow: + @nogc: + char* buf = null; + size_t size = 0; + size_t allocated = 0; + + void ensureSize(size_t atLeastthisSize) + { + if (atLeastthisSize > allocated) + { + allocated = 2 * allocated + atLeastthisSize + 1; // TODO: enhancing this estimation probably beneficial to performance + buf = cast(char*) realloc(buf, allocated); + } + + } + + ~this() + { + if (buf) + { + free(buf); + buf = null; + size = 0; + allocated = 0; + } + } + + void append(const(char)[] suffix) + { + size_t L = suffix.length; + ensureSize(size + L); + buf[size..size+L] = suffix[0..L]; + size += L; + } + + const(char)[] getData() + { + return buf[0..size]; + } + + static void appendCallback(const(char)* chars, uint size, void* userData) + { + GrowableBuffer* gb = cast(GrowableBuffer*) userData; + gb.append(chars[0..size]); + } + } + + GrowableBuffer gb; + gb.ensureSize(input.length); // TODO: enhancing this estimation probably beneficial to performance + + //int renderFlags = MD_RENDER_FLAG_DEBUG; + int renderFlags = 0; + + int ret = md_render_html(input.ptr, + cast(uint) input.length, + &GrowableBuffer.appendCallback, + &gb, flags, renderFlags); + return gb.getData.idup; // Note: this is the only GC-using stuff +} + + + + +import core.stdc.string; +import core.stdc.stdio; +import core.stdc.stdlib: malloc, free; + +nothrow: +@nogc: +@system: + +// Compatibility with older DMDFE +static if (__VERSION__ < 2079) +{ + import core.stdc.stdlib: _compare_fp_t; + // Provide @nogc nothrow bsearch and qsort for older compilers + extern (C): + @system: + inout(void)* bsearch(scope const void* key, scope inout(void)* base, size_t nmemb, size_t size, _compare_fp_t compar); + void qsort(scope void* base, size_t nmemb, size_t size, _compare_fp_t compar); +} +else +{ + import core.stdc.stdlib: qsort, bsearch; +} + +alias MD_CHAR = char; +alias MD_SIZE = uint; +alias MD_OFFSET = uint; + +/* Block represents a part of document hierarchy structure like a paragraph + * or list item. + */ +alias MD_BLOCKTYPE = int; +enum : MD_BLOCKTYPE +{ + /* ... */ + MD_BLOCK_DOC = 0, + + /*
...
*/ + MD_BLOCK_QUOTE, + + /* + * Detail: Structure MD_BLOCK_UL_DETAIL. */ + MD_BLOCK_UL, + + /*
    ...
+ * Detail: Structure MD_BLOCK_OL_DETAIL. */ + MD_BLOCK_OL, + + /*
  • ...
  • + * Detail: Structure MD_BLOCK_LI_DETAIL. */ + MD_BLOCK_LI, + + /*
    */ + MD_BLOCK_HR, + + /*

    ...

    (for levels up to 6) + * Detail: Structure MD_BLOCK_H_DETAIL. */ + MD_BLOCK_H, + + /*
    ...
    + * Note the text lines within code blocks are terminated with '\n' + * instead of explicit MD_TEXT_BR. */ + MD_BLOCK_CODE, + + /* Raw HTML block. This itself does not correspond to any particular HTML + * tag. The contents of it _is_ raw HTML source intended to be put + * in verbatim form to the HTML output. */ + MD_BLOCK_HTML, + + /*

    ...

    */ + MD_BLOCK_P, + + /* ...
    and its contents. + * Detail: Structure MD_BLOCK_TD_DETAIL (used with MD_BLOCK_TH and MD_BLOCK_TD) + * Note all of these are used only if extension MD_FLAG_TABLES is enabled. */ + MD_BLOCK_TABLE, + MD_BLOCK_THEAD, + MD_BLOCK_TBODY, + MD_BLOCK_TR, + MD_BLOCK_TH, + MD_BLOCK_TD +} + +/* Span represents an in-line piece of a document which should be rendered with + * the same font, color and other attributes. A sequence of spans forms a block + * like paragraph or list item. */ +alias MD_SPANTYPE = int; +enum : MD_SPANTYPE +{ + /* ... */ + MD_SPAN_EM, + + /* ... */ + MD_SPAN_STRONG, + + /* ... + * Detail: Structure MD_SPAN_A_DETAIL. */ + MD_SPAN_A, + + /* ... + * Detail: Structure MD_SPAN_IMG_DETAIL. + * Note: Image text can contain nested spans and even nested images. + * If rendered into ALT attribute of HTML tag, it's responsibility + * of the renderer to deal with it. + */ + MD_SPAN_IMG, + + /* ... */ + MD_SPAN_CODE, + + /* ... + * Note: Recognized only when MD_FLAG_STRIKETHROUGH is enabled. + */ + MD_SPAN_DEL, + + /* For recognizing inline ($) and display ($$) equations + * Note: Recognized only when MD_FLAG_LATEXMATHSPANS is enabled. + */ + MD_SPAN_LATEXMATH, + MD_SPAN_LATEXMATH_DISPLAY +} + +/* Text is the actual textual contents of span. */ +alias MD_TEXTTYPE = int; +enum : MD_TEXTTYPE +{ + /* Normal text. */ + MD_TEXT_NORMAL = 0, + + /* null character. CommonMark requires replacing null character with + * the replacement char U+FFFD, so this allows caller to do that easily. */ + MD_TEXT_NULLCHAR, + + /* Line breaks. + * Note these are not sent from blocks with verbatim output (MD_BLOCK_CODE + * or MD_BLOCK_HTML). In such cases, '\n' is part of the text itself. */ + MD_TEXT_BR, /*
    (hard break) */ + MD_TEXT_SOFTBR, /* '\n' in source text where it is not semantically meaningful (soft break) */ + + /* Entity. + * (a) Named entity, e.g.   + * (Note MD4C does not have a list of known entities. + * Anything matching the regexp /&[A-Za-z][A-Za-z0-9]{1,47};/ is + * treated as a named entity.) + * (b) Numerical entity, e.g. Ӓ + * (c) Hexadecimal entity, e.g. ካ + * + * As MD4C is mostly encoding agnostic, application gets the verbatim + * entity text into the MD_RENDERER::text_callback(). */ + MD_TEXT_ENTITY, + + /* Text in a code block (inside MD_BLOCK_CODE) or inlined code (`code`). + * If it is inside MD_BLOCK_CODE, it includes spaces for indentation and + * '\n' for new lines. MD_TEXT_BR and MD_TEXT_SOFTBR are not sent for this + * kind of text. */ + MD_TEXT_CODE, + + /* Text is a raw HTML. If it is contents of a raw HTML block (i.e. not + * an inline raw HTML), then MD_TEXT_BR and MD_TEXT_SOFTBR are not used. + * The text contains verbatim '\n' for the new lines. */ + MD_TEXT_HTML, + + /* Text is inside an equation. This is processed the same way as inlined code + * spans (`code`). */ + MD_TEXT_LATEXMATH +} + + +/* Alignment enumeration. */ + +alias MD_ALIGN = int; +enum : MD_ALIGN +{ + MD_ALIGN_DEFAULT = 0, /* When unspecified. */ + MD_ALIGN_LEFT, + MD_ALIGN_CENTER, + MD_ALIGN_RIGHT +} + + +/* String attribute. + * + * This wraps strings which are outside of a normal text flow and which are + * propagated within various detailed structures, but which still may contain + * string portions of different types like e.g. entities. + * + * So, for example, lets consider an image has a title attribute string + * set to "foo " bar". (Note the string size is 14.) + * + * Then the attribute MD_SPAN_IMG_DETAIL::title shall provide the following: + * -- [0]: "foo " (substr_types[0] == MD_TEXT_NORMAL; substr_offsets[0] == 0) + * -- [1]: """ (substr_types[1] == MD_TEXT_ENTITY; substr_offsets[1] == 4) + * -- [2]: " bar" (substr_types[2] == MD_TEXT_NORMAL; substr_offsets[2] == 10) + * -- [3]: (n/a) (n/a ; substr_offsets[3] == 14) + * + * Note that these conditions are guaranteed: + * -- substr_offsets[0] == 0 + * -- substr_offsets[LAST+1] == size + * -- Only MD_TEXT_NORMAL, MD_TEXT_ENTITY, MD_TEXT_NULLCHAR substrings can appear. + */ +struct MD_ATTRIBUTE +{ + const (MD_CHAR)* text; + MD_SIZE size; + const (MD_TEXTTYPE)* substr_types; + const (MD_OFFSET)* substr_offsets; +} + + +/* Detailed info for MD_BLOCK_UL. */ +struct MD_BLOCK_UL_DETAIL +{ + int is_tight; /* Non-zero if tight list, zero if loose. */ + MD_CHAR mark; /* Item bullet character in MarkDown source of the list, e.g. '-', '+', '*'. */ +} + +/* Detailed info for MD_BLOCK_OL. */ +struct MD_BLOCK_OL_DETAIL +{ + uint start; /* Start index of the ordered list. */ + int is_tight; /* Non-zero if tight list, zero if loose. */ + MD_CHAR mark_delimiter; /* Character delimiting the item marks in MarkDown source, e.g. '.' or ')' */ +} + +/* Detailed info for MD_BLOCK_LI. */ +struct MD_BLOCK_LI_DETAIL +{ + int is_task; /* Can be non-zero only with MD_FLAG_TASKLISTS */ + MD_CHAR task_mark; /* If is_task, then one of 'x', 'X' or ' '. Undefined otherwise. */ + MD_OFFSET task_mark_offset; /* If is_task, then offset in the input of the char between '[' and ']'. */ +} + +/* Detailed info for MD_BLOCK_H. */ +struct MD_BLOCK_H_DETAIL +{ + uint level; /* Header level (1 - 6) */ +} + +/* Detailed info for MD_BLOCK_CODE. */ +struct MD_BLOCK_CODE_DETAIL +{ + MD_ATTRIBUTE info; + MD_ATTRIBUTE lang; + MD_CHAR fence_char; /* The character used for fenced code block; or zero for indented code block. */ +} + +/* Detailed info for MD_BLOCK_TH and MD_BLOCK_TD. */ +struct MD_BLOCK_TD_DETAIL +{ + MD_ALIGN align_; +} + +/* Detailed info for MD_SPAN_A. */ +struct MD_SPAN_A_DETAIL +{ + MD_ATTRIBUTE href; + MD_ATTRIBUTE title; +} + +/* Detailed info for MD_SPAN_IMG. */ +struct MD_SPAN_IMG_DETAIL +{ + MD_ATTRIBUTE src; + MD_ATTRIBUTE title; +} + + +/* Flags specifying extensions/deviations from CommonMark specification. + * + * By default (when MD_RENDERER::flags == 0), we follow CommonMark specification. + * The following flags may allow some extensions or deviations from it. + */ + enum + { + MD_FLAG_COLLAPSEWHITESPACE = 0x0001, /* In MD_TEXT_NORMAL, collapse non-trivial whitespace into single ' ' */ + MD_FLAG_PERMISSIVEATXHEADERS = 0x0002, /* Do not require space in ATX headers ( ###header ) */ + MD_FLAG_PERMISSIVEURLAUTOLINKS = 0x0004, /* Recognize URLs as autolinks even without '<', '>' */ + MD_FLAG_PERMISSIVEEMAILAUTOLINKS = 0x0008, /* Recognize e-mails as autolinks even without '<', '>' and 'mailto:' */ + MD_FLAG_NOINDENTEDCODEBLOCKS = 0x0010, /* Disable indented code blocks. (Only fenced code works.) */ + MD_FLAG_NOHTMLBLOCKS = 0x0020, /* Disable raw HTML blocks. */ + MD_FLAG_NOHTMLSPANS = 0x0040, /* Disable raw HTML (inline). */ + MD_FLAG_TABLES = 0x0100, /* Enable tables extension. */ + MD_FLAG_STRIKETHROUGH = 0x0200, /* Enable strikethrough extension. */ + MD_FLAG_PERMISSIVEWWWAUTOLINKS = 0x0400, /* Enable WWW autolinks (even without any scheme prefix, if they begin with 'www.') */ + MD_FLAG_TASKLISTS = 0x0800, /* Enable task list extension. */ + MD_FLAG_LATEXMATHSPANS = 0x1000, /* Enable $ and $$ containing LaTeX equations. */ + + MD_FLAG_PERMISSIVEAUTOLINKS = MD_FLAG_PERMISSIVEEMAILAUTOLINKS | MD_FLAG_PERMISSIVEURLAUTOLINKS | MD_FLAG_PERMISSIVEWWWAUTOLINKS, + MD_FLAG_NOHTML = MD_FLAG_NOHTMLBLOCKS | MD_FLAG_NOHTMLSPANS, + + /* Convenient sets of flags corresponding to well-known Markdown dialects. + * + * Note we may only support subset of features of the referred dialect. + * The constant just enables those extensions which bring us as close as + * possible given what features we implement. + * + * ABI compatibility note: Meaning of these can change in time as new + * extensions, bringing the dialect closer to the original, are implemented. + */ + MD_DIALECT_COMMONMARK = 0, + MD_DIALECT_GITHUB = (MD_FLAG_PERMISSIVEAUTOLINKS | MD_FLAG_TABLES | MD_FLAG_STRIKETHROUGH | MD_FLAG_TASKLISTS), +} + +/* Renderer structure. + */ +struct MD_PARSER +{ +nothrow: +@nogc: + /* Reserved. Set to zero. + */ + uint abi_version; + + /* Dialect options. Bitmask of MD_FLAG_xxxx values. + */ + uint flags; + + /* Caller-provided rendering callbacks. + * + * For some block/span types, more detailed information is provided in a + * type-specific structure pointed by the argument 'detail'. + * + * The last argument of all callbacks, 'userdata', is just propagated from + * md_parse() and is available for any use by the application. + * + * Note any strings provided to the callbacks as their arguments or as + * members of any detail structure are generally not zero-terminated. + * Application has take the respective size information into account. + * + * Callbacks may abort further parsing of the document by returning non-zero. + */ + int function(MD_BLOCKTYPE /*type*/, void* /*detail*/, void* /*userdata*/) enter_block; + int function(MD_BLOCKTYPE /*type*/, void* /*detail*/, void* /*userdata*/) leave_block; + + int function(MD_SPANTYPE /*type*/, void* /*detail*/, void* /*userdata*/) enter_span; + int function(MD_SPANTYPE /*type*/, void* /*detail*/, void* /*userdata*/) leave_span; + + int function(MD_TEXTTYPE /*type*/, const(MD_CHAR)* /*text*/, MD_SIZE /*size*/, void* /*userdata*/) text; + + /* Debug callback. Optional (may be null). + * + * If provided and something goes wrong, this function gets called. + * This is intended for debugging and problem diagnosis for developers; + * it is not intended to provide any errors suitable for displaying to an + * end user. + */ + void function(const(char)* /*msg*/, void* /*userdata*/) debug_log; + + /* Reserved. Set to null. + */ + void function() syntax; +} + + +/***************************** + *** Miscellaneous Stuff *** + *****************************/ + + +/* Misc. macros. */ + +enum TRUE = 1; +enum FALSE = 0; + + +/************************ + *** Internal Types *** + ************************/ + +/* These are omnipresent so lets save some typing. */ +alias CHAR = MD_CHAR; +alias SZ = MD_SIZE; +alias OFF = MD_OFFSET; + +/* During analyzes of inline marks, we need to manage some "mark chains", + * of (yet unresolved) openers. This structure holds start/end of the chain. + * The chain internals are then realized through MD_MARK::prev and ::next. + */ +struct MD_MARKCHAIN +{ + int head; /* Index of first mark in the chain, or -1 if empty. */ + int tail; /* Index of last mark in the chain, or -1 if empty. */ +} + +enum OPENERS_CHAIN_FIRST = 2; +enum OPENERS_CHAIN_LAST = 11; + +/* Context propagated through all the parsing. */ +struct MD_CTX +{ +nothrow: +@nogc: + + /* Immutable stuff (parameters of md_parse()). */ + const(CHAR)* text; + SZ size; + MD_PARSER parser; + void* userdata; + + /* When this is true, it allows some optimizations. */ + int doc_ends_with_newline; + + /* Helper temporary growing buffer. */ + CHAR* buffer; + uint alloc_buffer; + + /* Reference definitions. */ + MD_REF_DEF* ref_defs; + int n_ref_defs; + int alloc_ref_defs; + void** ref_def_hashtable; + int ref_def_hashtable_size; + + /* Stack of inline/span markers. + * This is only used for parsing a single block contents but by storing it + * here we may reuse the stack for subsequent blocks; i.e. we have fewer + * (re)allocations. */ + MD_MARK* marks; + int n_marks; + int alloc_marks; + + ubyte[256] mark_char_map; + /* For resolving of inline spans. */ + MD_MARKCHAIN[12] mark_chains; + + MD_MARKCHAIN* PTR_CHAIN() return { return &mark_chains[0]; } + MD_MARKCHAIN* TABLECELLBOUNDARIES() return { return &mark_chains[1]; } + MD_MARKCHAIN* ASTERISK_OPENERS_extraword_mod3_0() return { return &mark_chains[2]; } + MD_MARKCHAIN* ASTERISK_OPENERS_extraword_mod3_1() return { return &mark_chains[3]; } + MD_MARKCHAIN* ASTERISK_OPENERS_extraword_mod3_2() return { return &mark_chains[4]; } + MD_MARKCHAIN* ASTERISK_OPENERS_intraword_mod3_0() return { return &mark_chains[5]; } + MD_MARKCHAIN* ASTERISK_OPENERS_intraword_mod3_1() return { return &mark_chains[6]; } + MD_MARKCHAIN* ASTERISK_OPENERS_intraword_mod3_2() return { return &mark_chains[7]; } + MD_MARKCHAIN* UNDERSCORE_OPENERS() return { return &mark_chains[8]; } + MD_MARKCHAIN* TILDE_OPENERS() return { return &mark_chains[9]; } + MD_MARKCHAIN* BRACKET_OPENERS() return { return &mark_chains[10]; } + MD_MARKCHAIN* DOLLAR_OPENERS() return { return &mark_chains[11]; } + + int n_table_cell_boundaries; + + /* For resolving links. */ + int unresolved_link_head; + int unresolved_link_tail; + + /* For resolving raw HTML. */ + OFF html_comment_horizon; + OFF html_proc_instr_horizon; + OFF html_decl_horizon; + OFF html_cdata_horizon; + + /* For block analysis. + * Notes: + * -- It holds MD_BLOCK as well as MD_LINE structures. After each + * MD_BLOCK, its (multiple) MD_LINE(s) follow. + * -- For MD_BLOCK_HTML and MD_BLOCK_CODE, MD_VERBATIMLINE(s) are used + * instead of MD_LINE(s). + */ + void* block_bytes; + MD_BLOCK* current_block; + int n_block_bytes; + int alloc_block_bytes; + + /* For container block analysis. */ + MD_CONTAINER* containers; + int n_containers; + int alloc_containers; + + /* Minimal indentation to call the block "indented code block". */ + uint code_indent_offset; + + /* Contextual info for line analysis. */ + SZ code_fence_length; /* For checking closing fence length. */ + int html_block_type; /* For checking closing raw HTML condition. */ + int last_line_has_list_loosening_effect; + int last_list_item_starts_with_two_blank_lines; + + void MD_LOG(const(char)* msg) + { + if(parser.debug_log != null) + parser.debug_log(msg, userdata); + } + + /* Character accessors. */ + CHAR CH(OFF off) + { + return text[off]; + } + + const(CHAR)* STR(OFF off) + { + return text + off; + } + + bool ISANYOF(OFF off, const(CHAR)* palette) { return ISANYOF_(CH(off), palette); } + bool ISANYOF2(OFF off, CHAR ch1, CHAR ch2) { return ISANYOF2_(CH(off), ch1, ch2); } + bool ISANYOF3(OFF off, CHAR ch1, CHAR ch2, CHAR ch3) { return ISANYOF3_(CH(off), ch1, ch2, ch3); } + bool ISASCII(OFF off) { return ISASCII_(CH(off)); } + bool ISBLANK(OFF off) { return ISBLANK_(CH(off)); } + bool ISNEWLINE(OFF off) { return ISNEWLINE_(CH(off)); } + bool ISWHITESPACE(OFF off) { return ISWHITESPACE_(CH(off)); } + bool ISCNTRL(OFF off) { return ISCNTRL_(CH(off)); } + bool ISPUNCT(OFF off) { return ISPUNCT_(CH(off)); } + bool ISUPPER(OFF off) { return ISUPPER_(CH(off)); } + bool ISLOWER(OFF off) { return ISLOWER_(CH(off)); } + bool ISALPHA(OFF off) { return ISALPHA_(CH(off)); } + bool ISDIGIT(OFF off) { return ISDIGIT_(CH(off)); } + bool ISXDIGIT(OFF off) { return ISXDIGIT_(CH(off)); } + bool ISALNUM(OFF off) { return ISALNUM_(CH(off)); } +} + +alias MD_LINETYPE = int; +enum : MD_LINETYPE +{ + MD_LINE_BLANK, + MD_LINE_HR, + MD_LINE_ATXHEADER, + MD_LINE_SETEXTHEADER, + MD_LINE_SETEXTUNDERLINE, + MD_LINE_INDENTEDCODE, + MD_LINE_FENCEDCODE, + MD_LINE_HTML, + MD_LINE_TEXT, + MD_LINE_TABLE, + MD_LINE_TABLEUNDERLINE +} + +struct MD_LINE_ANALYSIS +{ +nothrow: +@nogc: + short type_; + ushort data_; + + MD_LINETYPE type() const + { + return type_; + } + + void type(MD_LINETYPE value) + { + type_ = cast(short)value; + } + + int data() const + { + return data_; + } + + void data(uint value) + { + data_ = cast(ushort)value; + } + + OFF beg; + OFF end; + uint indent; /* Indentation level. */ +} + +struct MD_LINE +{ + OFF beg; + OFF end; +} + +struct MD_VERBATIMLINE +{ + OFF beg; + OFF end; + OFF indent; +} + + +/***************** + *** Helpers *** + *****************/ + +pure +{ + /* Character classification. + * Note we assume ASCII compatibility of code points < 128 here. */ + bool ISIN_(CHAR ch, CHAR ch_min, CHAR ch_max) + { + return (ch_min <= cast(uint)(ch) && cast(uint)(ch) <= ch_max); + } + + bool ISANYOF_(CHAR ch, const(CHAR)* palette) + { + return md_strchr(palette, ch) != null; + } + + bool ISANYOF2_(CHAR ch, CHAR ch1, CHAR ch2) + { + return (ch == ch1) || (ch == ch2); + } + + bool ISANYOF3_(CHAR ch, CHAR ch1, CHAR ch2, CHAR ch3) + { + return (ch == ch1) || (ch == ch2) || (ch == ch3); + } + + bool ISASCII_(CHAR ch) + { + return (cast(uint)ch) <= 127; + } + + bool ISBLANK_(CHAR ch) + { + return ISANYOF2_(ch, ' ', '\t'); + } + + bool ISNEWLINE_(CHAR ch) + { + return ISANYOF2_(ch, '\r', '\n'); + } + + bool ISWHITESPACE_(CHAR ch) + { + return ISBLANK_(ch) || ISANYOF2_(ch, '\v', '\f'); + } + + bool ISCNTRL_(CHAR ch) + { + return (cast(uint)(ch) <= 31 || cast(uint)(ch) == 127); + } + + bool ISPUNCT_(CHAR ch) + { + return ISIN_(ch, 33, 47) || ISIN_(ch, 58, 64) || ISIN_(ch, 91, 96) || ISIN_(ch, 123, 126); + } + + bool ISUPPER_(CHAR ch) + { + return ISIN_(ch, 'A', 'Z'); + } + + bool ISLOWER_(CHAR ch) + { + return ISIN_(ch, 'a', 'z'); + } + + bool ISALPHA_(CHAR ch) + { + return ISUPPER_(ch) || ISLOWER_(ch); + } + + bool ISDIGIT_(CHAR ch) + { + return ISIN_(ch, '0', '9'); + } + + bool ISXDIGIT_(CHAR ch) + { + return ISDIGIT_(ch) || ISIN_(ch, 'A', 'F') || ISIN_(ch, 'a', 'f'); + } + + bool ISALNUM_(CHAR ch) + { + return ISALPHA_(ch) || ISDIGIT_(ch); + } +} + +const(CHAR)* md_strchr(const(CHAR)* str, CHAR ch) pure +{ + OFF i; + for(i = 0; str[i] != '\0'; i++) { + if(ch == str[i]) + return (str + i); + } + return null; +} + +/* Case insensitive check of string equality. */ +int md_ascii_case_eq(const(CHAR)* s1, const(CHAR)* s2, SZ n) +{ + OFF i; + for(i = 0; i < n; i++) { + CHAR ch1 = s1[i]; + CHAR ch2 = s2[i]; + + if(ISLOWER_(ch1)) + ch1 += ('A'-'a'); + if(ISLOWER_(ch2)) + ch2 += ('A'-'a'); + if(ch1 != ch2) + return FALSE; + } + return TRUE; +} + +int md_ascii_eq(const(CHAR)* s1, const(CHAR)* s2, SZ n) +{ + return memcmp(s1, s2, n * CHAR.sizeof) == 0; +} + +int md_text_with_null_replacement(MD_CTX* ctx, MD_TEXTTYPE type, const(CHAR)* str, SZ size) +{ + OFF off = 0; + int ret = 0; + + while(1) { + while(off < size && str[off] != '\0') + off++; + + if(off > 0) { + ret = ctx.parser.text(type, str, off, ctx.userdata); + if(ret != 0) + return ret; + + str += off; + size -= off; + off = 0; + } + + if(off >= size) + return 0; + + ret = ctx.parser.text(MD_TEXT_NULLCHAR, "", 1, ctx.userdata); + if(ret != 0) + return ret; + off++; + } +} + +int MD_TEMP_BUFFER(MD_CTX* ctx, SZ sz) +{ + if(sz > ctx.alloc_buffer) + { + CHAR* new_buffer; + SZ new_size = ((sz) + (sz) / 2 + 128) & ~127; + new_buffer = cast(CHAR*) realloc_safe(ctx.buffer, new_size); + if (new_buffer == null) + { + ctx.MD_LOG("realloc() failed."); + return -1; + } + ctx.buffer = new_buffer; + ctx.alloc_buffer = new_size; + } + return 0; +} + +int MD_ENTER_BLOCK(MD_CTX* ctx, MD_BLOCKTYPE type, void* arg) +{ + int ret = ctx.parser.enter_block(type, arg, ctx.userdata); + if(ret != 0) + { + ctx.MD_LOG("Aborted from enter_block() callback."); + return ret; + } + return 0; +} + +int MD_LEAVE_BLOCK(MD_CTX* ctx, MD_BLOCKTYPE type, void* arg) +{ + int ret = ctx.parser.leave_block(type, arg, ctx.userdata); + if(ret != 0) + { + ctx.MD_LOG("Aborted from leave_block() callback."); + return ret; + } + return 0; +} + +int MD_ENTER_SPAN(MD_CTX* ctx, MD_SPANTYPE type, void* arg) +{ + int ret = ctx.parser.enter_span(type, arg, ctx.userdata); + if(ret != 0) + { + ctx.MD_LOG("Aborted from enter_span() callback."); + return ret; + } + return 0; +} + +int MD_LEAVE_SPAN(MD_CTX* ctx, MD_SPANTYPE type, void* arg) +{ + int ret = ctx.parser.leave_span(type, arg, ctx.userdata); + if(ret != 0) + { + ctx.MD_LOG("Aborted from leave_span() callback."); + return ret; + } + return 0; +} + +int MD_TEXT(MD_CTX* ctx, MD_TEXTTYPE type, const(MD_CHAR)* str, MD_SIZE size) +{ + if(size > 0) + { + int ret = ctx.parser.text((type), (str), (size), ctx.userdata); + if (ret != 0) + { + ctx.MD_LOG("Aborted from text() callback."); + return ret; + } + } + return 0; +} + +int MD_TEXT_INSECURE(MD_CTX* ctx, MD_TEXTTYPE type, const(MD_CHAR)* str, MD_SIZE size) +{ + if(size > 0) + { + int ret = md_text_with_null_replacement(ctx, type, str, size); + if(ret != 0) + { + ctx.MD_LOG("Aborted from text() callback."); + return ret; + } + } + return 0; +} + +/************************* + *** Unicode Support *** + *************************/ + +struct MD_UNICODE_FOLD_INFO +{ + uint[3] codepoints; + int n_codepoints; +}; + + + +/* Binary search over sorted "map" of codepoints. Consecutive sequences + * of codepoints may be encoded in the map by just using the + * (MIN_CODEPOINT | 0x40000000) and (MAX_CODEPOINT | 0x80000000). + * + * Returns index of the found record in the map (in the case of ranges, + * the minimal value is used); or -1 on failure. */ +int md_unicode_bsearch__(uint codepoint, const(uint)* map, size_t map_size) +{ + int beg, end; + int pivot_beg, pivot_end; + + beg = 0; + end = cast(int) map_size-1; + while(beg <= end) { + /* Pivot may be a range, not just a single value. */ + pivot_beg = pivot_end = (beg + end) / 2; + if(map[pivot_end] & 0x40000000) + pivot_end++; + if(map[pivot_beg] & 0x80000000) + pivot_beg--; + + if(codepoint < (map[pivot_beg] & 0x00ffffff)) + end = pivot_beg - 1; + else if(codepoint > (map[pivot_end] & 0x00ffffff)) + beg = pivot_end + 1; + else + return pivot_beg; + } + + return -1; +} + +bool md_is_unicode_whitespace__(uint codepoint) +{ + /* Unicode "Zs" category. + * (generated by scripts/build_whitespace_map.py) */ + static immutable uint[] WHITESPACE_MAP = + [ + 0x0020, 0x00a0, 0x1680, 0x2000| 0x40000000, 0x200a | 0x80000000, 0x202f, 0x205f, 0x3000 + ]; + + /* The ASCII ones are the most frequently used ones, also CommonMark + * specification requests few more in this range. */ + if(codepoint <= 0x7f) + return ISWHITESPACE_(cast(CHAR)codepoint); + + return (md_unicode_bsearch__(codepoint, WHITESPACE_MAP.ptr, WHITESPACE_MAP.length) >= 0); +} + +bool md_is_unicode_punct__(uint codepoint) +{ + /* Unicode "Pc", "Pd", "Pe", "Pf", "Pi", "Po", "Ps" categories. + * (generated by scripts/build_punct_map.py) */ + static immutable uint[] PUNCT_MAP = + [ + 0x0021 | 0x40000000,0x0023 | 0x80000000, 0x0025 | 0x40000000,0x002a | 0x80000000, 0x002c | 0x40000000,0x002f | 0x80000000, 0x003a | 0x40000000,0x003b | 0x80000000, 0x003f | 0x40000000,0x0040 | 0x80000000, + 0x005b | 0x40000000,0x005d | 0x80000000, 0x005f, 0x007b, 0x007d, 0x00a1, 0x00a7, 0x00ab, 0x00b6 | 0x40000000,0x00b7 | 0x80000000, + 0x00bb, 0x00bf, 0x037e, 0x0387, 0x055a | 0x40000000,0x055f | 0x80000000, 0x0589 | 0x40000000,0x058a | 0x80000000, 0x05be, 0x05c0, + 0x05c3, 0x05c6, 0x05f3 | 0x40000000,0x05f4 | 0x80000000, 0x0609 | 0x40000000,0x060a | 0x80000000, 0x060c | 0x40000000,0x060d | 0x80000000, 0x061b, 0x061e | 0x40000000,0x061f | 0x80000000, + 0x066a | 0x40000000,0x066d | 0x80000000, 0x06d4, 0x0700 | 0x40000000,0x070d | 0x80000000, 0x07f7 | 0x40000000,0x07f9 | 0x80000000, 0x0830 | 0x40000000,0x083e | 0x80000000, 0x085e, + 0x0964 | 0x40000000,0x0965 | 0x80000000, 0x0970, 0x09fd, 0x0a76, 0x0af0, 0x0c77, 0x0c84, 0x0df4, 0x0e4f, + 0x0e5a | 0x40000000,0x0e5b | 0x80000000, 0x0f04 | 0x40000000,0x0f12 | 0x80000000, 0x0f14, 0x0f3a | 0x40000000,0x0f3d | 0x80000000, 0x0f85, 0x0fd0 | 0x40000000,0x0fd4 | 0x80000000, + 0x0fd9 | 0x40000000,0x0fda | 0x80000000, 0x104a | 0x40000000,0x104f | 0x80000000, 0x10fb, 0x1360 | 0x40000000,0x1368 | 0x80000000, 0x1400, 0x166e, 0x169b | 0x40000000,0x169c | 0x80000000, + 0x16eb | 0x40000000,0x16ed | 0x80000000, 0x1735 | 0x40000000,0x1736 | 0x80000000, 0x17d4 | 0x40000000,0x17d6 | 0x80000000, 0x17d8 | 0x40000000,0x17da | 0x80000000, 0x1800 | 0x40000000,0x180a | 0x80000000, + 0x1944 | 0x40000000,0x1945 | 0x80000000, 0x1a1e | 0x40000000,0x1a1f | 0x80000000, 0x1aa0 | 0x40000000,0x1aa6 | 0x80000000, 0x1aa8 | 0x40000000,0x1aad | 0x80000000, 0x1b5a | 0x40000000,0x1b60 | 0x80000000, + 0x1bfc | 0x40000000,0x1bff | 0x80000000, 0x1c3b | 0x40000000,0x1c3f | 0x80000000, 0x1c7e | 0x40000000,0x1c7f | 0x80000000, 0x1cc0 | 0x40000000,0x1cc7 | 0x80000000, 0x1cd3, 0x2010 | 0x40000000,0x2027 | 0x80000000, + 0x2030 | 0x40000000,0x2043 | 0x80000000, 0x2045 | 0x40000000,0x2051 | 0x80000000, 0x2053 | 0x40000000,0x205e | 0x80000000, 0x207d | 0x40000000,0x207e | 0x80000000, 0x208d | 0x40000000,0x208e | 0x80000000, + 0x2308 | 0x40000000,0x230b | 0x80000000, 0x2329 | 0x40000000,0x232a | 0x80000000, 0x2768 | 0x40000000,0x2775 | 0x80000000, 0x27c5 | 0x40000000,0x27c6 | 0x80000000, 0x27e6 | 0x40000000,0x27ef | 0x80000000, + 0x2983 | 0x40000000,0x2998 | 0x80000000, 0x29d8 | 0x40000000,0x29db | 0x80000000, 0x29fc | 0x40000000,0x29fd | 0x80000000, 0x2cf9 | 0x40000000,0x2cfc | 0x80000000, 0x2cfe | 0x40000000,0x2cff | 0x80000000, 0x2d70, + 0x2e00 | 0x40000000,0x2e2e | 0x80000000, 0x2e30 | 0x40000000,0x2e4f | 0x80000000, 0x3001 | 0x40000000,0x3003 | 0x80000000, 0x3008 | 0x40000000,0x3011 | 0x80000000, 0x3014 | 0x40000000,0x301f | 0x80000000, 0x3030, + 0x303d, 0x30a0, 0x30fb, 0xa4fe | 0x40000000,0xa4ff | 0x80000000, 0xa60d | 0x40000000,0xa60f | 0x80000000, 0xa673, 0xa67e, + 0xa6f2 | 0x40000000,0xa6f7 | 0x80000000, 0xa874 | 0x40000000,0xa877 | 0x80000000, 0xa8ce | 0x40000000,0xa8cf | 0x80000000, 0xa8f8 | 0x40000000,0xa8fa | 0x80000000, 0xa8fc, 0xa92e | 0x40000000,0xa92f | 0x80000000, + 0xa95f, 0xa9c1 | 0x40000000,0xa9cd | 0x80000000, 0xa9de | 0x40000000,0xa9df | 0x80000000, 0xaa5c | 0x40000000,0xaa5f | 0x80000000, 0xaade | 0x40000000,0xaadf | 0x80000000, 0xaaf0 | 0x40000000,0xaaf1 | 0x80000000, + 0xabeb, 0xfd3e | 0x40000000,0xfd3f | 0x80000000, 0xfe10 | 0x40000000,0xfe19 | 0x80000000, 0xfe30 | 0x40000000,0xfe52 | 0x80000000, 0xfe54 | 0x40000000,0xfe61 | 0x80000000, 0xfe63, 0xfe68, + 0xfe6a | 0x40000000,0xfe6b | 0x80000000, 0xff01 | 0x40000000,0xff03 | 0x80000000, 0xff05 | 0x40000000,0xff0a | 0x80000000, 0xff0c | 0x40000000,0xff0f | 0x80000000, 0xff1a | 0x40000000,0xff1b | 0x80000000, + 0xff1f | 0x40000000,0xff20 | 0x80000000, 0xff3b | 0x40000000,0xff3d | 0x80000000, 0xff3f, 0xff5b, 0xff5d, 0xff5f | 0x40000000,0xff65 | 0x80000000, 0x10100 | 0x40000000,0x10102 | 0x80000000, + 0x1039f, 0x103d0, 0x1056f, 0x10857, 0x1091f, 0x1093f, 0x10a50 | 0x40000000,0x10a58 | 0x80000000, 0x10a7f, + 0x10af0 | 0x40000000,0x10af6 | 0x80000000, 0x10b39 | 0x40000000,0x10b3f | 0x80000000, 0x10b99 | 0x40000000,0x10b9c | 0x80000000, 0x10f55 | 0x40000000,0x10f59 | 0x80000000, 0x11047 | 0x40000000,0x1104d | 0x80000000, + 0x110bb | 0x40000000,0x110bc | 0x80000000, 0x110be | 0x40000000,0x110c1 | 0x80000000, 0x11140 | 0x40000000,0x11143 | 0x80000000, 0x11174 | 0x40000000,0x11175 | 0x80000000, 0x111c5 | 0x40000000,0x111c8 | 0x80000000, + 0x111cd, 0x111db, 0x111dd | 0x40000000,0x111df | 0x80000000, 0x11238 | 0x40000000,0x1123d | 0x80000000, 0x112a9, 0x1144b | 0x40000000,0x1144f | 0x80000000, + 0x1145b, 0x1145d, 0x114c6, 0x115c1 | 0x40000000,0x115d7 | 0x80000000, 0x11641 | 0x40000000,0x11643 | 0x80000000, 0x11660 | 0x40000000,0x1166c | 0x80000000, + 0x1173c | 0x40000000,0x1173e | 0x80000000, 0x1183b, 0x119e2, 0x11a3f | 0x40000000,0x11a46 | 0x80000000, 0x11a9a | 0x40000000,0x11a9c | 0x80000000, 0x11a9e | 0x40000000,0x11aa2 | 0x80000000, + 0x11c41 | 0x40000000,0x11c45 | 0x80000000, 0x11c70 | 0x40000000,0x11c71 | 0x80000000, 0x11ef7 | 0x40000000,0x11ef8 | 0x80000000, 0x11fff, 0x12470 | 0x40000000,0x12474 | 0x80000000, + 0x16a6e | 0x40000000,0x16a6f | 0x80000000, 0x16af5, 0x16b37 | 0x40000000,0x16b3b | 0x80000000, 0x16b44, 0x16e97 | 0x40000000,0x16e9a | 0x80000000, 0x16fe2, + 0x1bc9f, 0x1da87 | 0x40000000,0x1da8b | 0x80000000, 0x1e95e | 0x40000000,0x1e95f | 0x80000000 + ]; + + /* The ASCII ones are the most frequently used ones, also CommonMark + * specification requests few more in this range. */ + if(codepoint <= 0x7f) + return ISPUNCT_(cast(CHAR)codepoint); + + return (md_unicode_bsearch__(codepoint, PUNCT_MAP.ptr, PUNCT_MAP.length) >= 0); +} + +void md_get_unicode_fold_info(uint codepoint, MD_UNICODE_FOLD_INFO* info) +{ + /* Unicode "Pc", "Pd", "Pe", "Pf", "Pi", "Po", "Ps" categories. + * (generated by scripts/build_punct_map.py) */ + static immutable uint[] FOLD_MAP_1 = + [ + 0x0041 | 0x40000000, 0x005a | 0x80000000, 0x00b5, 0x00c0 | 0x40000000, 0x00d6 | 0x80000000, 0x00d8 | 0x40000000, 0x00de | 0x80000000, 0x0100 | 0x40000000, 0x012e | 0x80000000, 0x0132 | 0x40000000, 0x0136 | 0x80000000, + 0x0139 | 0x40000000, 0x0147 | 0x80000000, 0x014a | 0x40000000, 0x0176 | 0x80000000, 0x0178, 0x0179 | 0x40000000, 0x017d | 0x80000000, 0x017f, 0x0181, 0x0182, + 0x0186, 0x0187, 0x0189, 0x018b, 0x018e, 0x018f, 0x0190, 0x0191, 0x0193, + 0x0194, 0x0196, 0x0197, 0x0198, 0x019c, 0x019d, 0x019f, 0x01a0 | 0x40000000, 0x01a4 | 0x80000000, 0x01a6, + 0x01a7, 0x01a9, 0x01ac, 0x01ae, 0x01af, 0x01b1, 0x01b3, 0x01b7, 0x01b8, + 0x01bc, 0x01c4, 0x01c5, 0x01c7, 0x01c8, 0x01ca, 0x01cb | 0x40000000, 0x01db | 0x80000000, 0x01de | 0x40000000, 0x01ee | 0x80000000, + 0x01f1, 0x01f2, 0x01f6, 0x01f7, 0x01f8 | 0x40000000, 0x021e | 0x80000000, 0x0220, 0x0222 | 0x40000000, 0x0232 | 0x80000000, 0x023a, + 0x023b, 0x023d, 0x023e, 0x0241, 0x0243, 0x0244, 0x0245, 0x0246 | 0x40000000, 0x024e | 0x80000000, 0x0345, + 0x0370, 0x0376, 0x037f, 0x0386, 0x0388 | 0x40000000, 0x038a | 0x80000000, 0x038c, 0x038e, 0x0391 | 0x40000000, 0x03a1 | 0x80000000, + 0x03a3 | 0x40000000, 0x03ab | 0x80000000, 0x03c2, 0x03cf, 0x03d0, 0x03d1, 0x03d5, 0x03d6, 0x03d8 | 0x40000000, 0x03ee | 0x80000000, + 0x03f0, 0x03f1, 0x03f4, 0x03f5, 0x03f7, 0x03f9, 0x03fa, 0x03fd | 0x40000000, 0x03ff | 0x80000000, + 0x0400 | 0x40000000, 0x040f | 0x80000000, 0x0410 | 0x40000000, 0x042f | 0x80000000, 0x0460 | 0x40000000, 0x0480 | 0x80000000, 0x048a | 0x40000000, 0x04be | 0x80000000, 0x04c0, 0x04c1 | 0x40000000, 0x04cd | 0x80000000, + 0x04d0 | 0x40000000, 0x052e | 0x80000000, 0x0531 | 0x40000000, 0x0556 | 0x80000000, 0x10a0 | 0x40000000, 0x10c5 | 0x80000000, 0x10c7, 0x10cd, 0x13f8 | 0x40000000, 0x13fd | 0x80000000, 0x1c80, + 0x1c81, 0x1c82, 0x1c83, 0x1c85, 0x1c86, 0x1c87, 0x1c88, 0x1c90 | 0x40000000, 0x1cba | 0x80000000, + 0x1cbd | 0x40000000, 0x1cbf | 0x80000000, 0x1e00 | 0x40000000, 0x1e94 | 0x80000000, 0x1e9b, 0x1ea0 | 0x40000000, 0x1efe | 0x80000000, 0x1f08 | 0x40000000, 0x1f0f | 0x80000000, 0x1f18 | 0x40000000, 0x1f1d | 0x80000000, + 0x1f28 | 0x40000000, 0x1f2f | 0x80000000, 0x1f38 | 0x40000000, 0x1f3f | 0x80000000, 0x1f48 | 0x40000000, 0x1f4d | 0x80000000, 0x1f59, 0x1f5b, 0x1f5d, 0x1f5f, + 0x1f68 | 0x40000000, 0x1f6f | 0x80000000, 0x1fb8, 0x1fba, 0x1fbe, 0x1fc8 | 0x40000000, 0x1fcb | 0x80000000, 0x1fd8, 0x1fda, 0x1fe8, + 0x1fea, 0x1fec, 0x1ff8, 0x1ffa, 0x2126, 0x212a, 0x212b, 0x2132, 0x2160 | 0x40000000, 0x216f | 0x80000000, + 0x2183, 0x24b6 | 0x40000000, 0x24cf | 0x80000000, 0x2c00 | 0x40000000, 0x2c2e | 0x80000000, 0x2c60, 0x2c62, 0x2c63, 0x2c64, + 0x2c67 | 0x40000000, 0x2c6b | 0x80000000, 0x2c6d, 0x2c6e, 0x2c6f, 0x2c70, 0x2c72, 0x2c75, 0x2c7e, + 0x2c80 | 0x40000000, 0x2ce2 | 0x80000000, 0x2ceb, 0x2cf2, 0xa640 | 0x40000000, 0xa66c | 0x80000000, 0xa680 | 0x40000000, 0xa69a | 0x80000000, 0xa722 | 0x40000000, 0xa72e | 0x80000000, + 0xa732 | 0x40000000, 0xa76e | 0x80000000, 0xa779, 0xa77d, 0xa77e | 0x40000000, 0xa786 | 0x80000000, 0xa78b, 0xa78d, 0xa790, + 0xa796 | 0x40000000, 0xa7a8 | 0x80000000, 0xa7aa, 0xa7ab, 0xa7ac, 0xa7ad, 0xa7ae, 0xa7b0, 0xa7b1, 0xa7b2, + 0xa7b3, 0xa7b4 | 0x40000000, 0xa7be | 0x80000000, 0xa7c2, 0xa7c4, 0xa7c5, 0xa7c6, 0xab70 | 0x40000000, 0xabbf | 0x80000000, + 0xff21 | 0x40000000, 0xff3a | 0x80000000, 0x10400 | 0x40000000, 0x10427 | 0x80000000, 0x104b0 | 0x40000000, 0x104d3 | 0x80000000, 0x10c80 | 0x40000000, 0x10cb2 | 0x80000000, 0x118a0 | 0x40000000, 0x118bf | 0x80000000, + 0x16e40 | 0x40000000, 0x16e5f | 0x80000000, 0x1e900 | 0x40000000, 0x1e921 | 0x80000000 + ]; + + static immutable uint[] FOLD_MAP_1_DATA = + [ + 0x0061, 0x007a, 0x03bc, 0x00e0, 0x00f6, 0x00f8, 0x00fe, 0x0101, 0x012f, 0x0133, 0x0137, 0x013a, 0x0148, + 0x014b, 0x0177, 0x00ff, 0x017a, 0x017e, 0x0073, 0x0253, 0x0183, 0x0254, 0x0188, 0x0256, 0x018c, 0x01dd, + 0x0259, 0x025b, 0x0192, 0x0260, 0x0263, 0x0269, 0x0268, 0x0199, 0x026f, 0x0272, 0x0275, 0x01a1, 0x01a5, + 0x0280, 0x01a8, 0x0283, 0x01ad, 0x0288, 0x01b0, 0x028a, 0x01b4, 0x0292, 0x01b9, 0x01bd, 0x01c6, 0x01c6, + 0x01c9, 0x01c9, 0x01cc, 0x01cc, 0x01dc, 0x01df, 0x01ef, 0x01f3, 0x01f3, 0x0195, 0x01bf, 0x01f9, 0x021f, + 0x019e, 0x0223, 0x0233, 0x2c65, 0x023c, 0x019a, 0x2c66, 0x0242, 0x0180, 0x0289, 0x028c, 0x0247, 0x024f, + 0x03b9, 0x0371, 0x0377, 0x03f3, 0x03ac, 0x03ad, 0x03af, 0x03cc, 0x03cd, 0x03b1, 0x03c1, 0x03c3, 0x03cb, + 0x03c3, 0x03d7, 0x03b2, 0x03b8, 0x03c6, 0x03c0, 0x03d9, 0x03ef, 0x03ba, 0x03c1, 0x03b8, 0x03b5, 0x03f8, + 0x03f2, 0x03fb, 0x037b, 0x037d, 0x0450, 0x045f, 0x0430, 0x044f, 0x0461, 0x0481, 0x048b, 0x04bf, 0x04cf, + 0x04c2, 0x04ce, 0x04d1, 0x052f, 0x0561, 0x0586, 0x2d00, 0x2d25, 0x2d27, 0x2d2d, 0x13f0, 0x13f5, 0x0432, + 0x0434, 0x043e, 0x0441, 0x0442, 0x044a, 0x0463, 0xa64b, 0x10d0, 0x10fa, 0x10fd, 0x10ff, 0x1e01, 0x1e95, + 0x1e61, 0x1ea1, 0x1eff, 0x1f00, 0x1f07, 0x1f10, 0x1f15, 0x1f20, 0x1f27, 0x1f30, 0x1f37, 0x1f40, 0x1f45, + 0x1f51, 0x1f53, 0x1f55, 0x1f57, 0x1f60, 0x1f67, 0x1fb0, 0x1f70, 0x03b9, 0x1f72, 0x1f75, 0x1fd0, 0x1f76, + 0x1fe0, 0x1f7a, 0x1fe5, 0x1f78, 0x1f7c, 0x03c9, 0x006b, 0x00e5, 0x214e, 0x2170, 0x217f, 0x2184, 0x24d0, + 0x24e9, 0x2c30, 0x2c5e, 0x2c61, 0x026b, 0x1d7d, 0x027d, 0x2c68, 0x2c6c, 0x0251, 0x0271, 0x0250, 0x0252, + 0x2c73, 0x2c76, 0x023f, 0x2c81, 0x2ce3, 0x2cec, 0x2cf3, 0xa641, 0xa66d, 0xa681, 0xa69b, 0xa723, 0xa72f, + 0xa733, 0xa76f, 0xa77a, 0x1d79, 0xa77f, 0xa787, 0xa78c, 0x0265, 0xa791, 0xa797, 0xa7a9, 0x0266, 0x025c, + 0x0261, 0x026c, 0x026a, 0x029e, 0x0287, 0x029d, 0xab53, 0xa7b5, 0xa7bf, 0xa7c3, 0xa794, 0x0282, 0x1d8e, + 0x13a0, 0x13ef, 0xff41, 0xff5a, 0x10428, 0x1044f, 0x104d8, 0x104fb, 0x10cc0, 0x10cf2, 0x118c0, 0x118df, + 0x16e60, 0x16e7f, 0x1e922, 0x1e943 + ]; + + static immutable uint[] FOLD_MAP_2 = + [ + 0x00df, 0x0130, 0x0149, 0x01f0, 0x0587, 0x1e96, 0x1e97, 0x1e98, 0x1e99, + 0x1e9a, 0x1e9e, 0x1f50, 0x1f80 | 0x40000000, 0x1f87 | 0x80000000, 0x1f88 | 0x40000000, 0x1f8f | 0x80000000, 0x1f90 | 0x40000000, 0x1f97 | 0x80000000, 0x1f98 | 0x40000000, 0x1f9f | 0x80000000, + 0x1fa0 | 0x40000000, 0x1fa7 | 0x80000000, 0x1fa8 | 0x40000000, 0x1faf | 0x80000000, 0x1fb2, 0x1fb3, 0x1fb4, 0x1fb6, 0x1fbc, 0x1fc2, + 0x1fc3, 0x1fc4, 0x1fc6, 0x1fcc, 0x1fd6, 0x1fe4, 0x1fe6, 0x1ff2, 0x1ff3, + 0x1ff4, 0x1ff6, 0x1ffc, 0xfb00, 0xfb01, 0xfb02, 0xfb05, 0xfb06, 0xfb13, + 0xfb14, 0xfb15, 0xfb16, 0xfb17 + ]; + + static immutable uint[] FOLD_MAP_2_DATA = + [ + 0x0073,0x0073, 0x0069,0x0307, 0x02bc,0x006e, 0x006a,0x030c, 0x0565,0x0582, 0x0068,0x0331, 0x0074,0x0308, + 0x0077,0x030a, 0x0079,0x030a, 0x0061,0x02be, 0x0073,0x0073, 0x03c5,0x0313, 0x1f00,0x03b9, 0x1f07,0x03b9, + 0x1f00,0x03b9, 0x1f07,0x03b9, 0x1f20,0x03b9, 0x1f27,0x03b9, 0x1f20,0x03b9, 0x1f27,0x03b9, 0x1f60,0x03b9, + 0x1f67,0x03b9, 0x1f60,0x03b9, 0x1f67,0x03b9, 0x1f70,0x03b9, 0x03b1,0x03b9, 0x03ac,0x03b9, 0x03b1,0x0342, + 0x03b1,0x03b9, 0x1f74,0x03b9, 0x03b7,0x03b9, 0x03ae,0x03b9, 0x03b7,0x0342, 0x03b7,0x03b9, 0x03b9,0x0342, + 0x03c1,0x0313, 0x03c5,0x0342, 0x1f7c,0x03b9, 0x03c9,0x03b9, 0x03ce,0x03b9, 0x03c9,0x0342, 0x03c9,0x03b9, + 0x0066,0x0066, 0x0066,0x0069, 0x0066,0x006c, 0x0073,0x0074, 0x0073,0x0074, 0x0574,0x0576, 0x0574,0x0565, + 0x0574,0x056b, 0x057e,0x0576, 0x0574,0x056d + ]; + + static immutable uint[] FOLD_MAP_3 = + [ + 0x0390, 0x03b0, 0x1f52, 0x1f54, 0x1f56, 0x1fb7, 0x1fc7, 0x1fd2, 0x1fd3, + 0x1fd7, 0x1fe2, 0x1fe3, 0x1fe7, 0x1ff7, 0xfb03, 0xfb04 + ]; + + static immutable uint[] FOLD_MAP_3_DATA = + [ + 0x03b9,0x0308,0x0301, 0x03c5,0x0308,0x0301, 0x03c5,0x0313,0x0300, 0x03c5,0x0313,0x0301, + 0x03c5,0x0313,0x0342, 0x03b1,0x0342,0x03b9, 0x03b7,0x0342,0x03b9, 0x03b9,0x0308,0x0300, + 0x03b9,0x0308,0x0301, 0x03b9,0x0308,0x0342, 0x03c5,0x0308,0x0300, 0x03c5,0x0308,0x0301, + 0x03c5,0x0308,0x0342, 0x03c9,0x0342,0x03b9, 0x0066,0x0066,0x0069, 0x0066,0x0066,0x006c + ]; + + static struct FOLD_MAP + { + const(uint)* map; + const(uint)* data; + size_t map_size; + int n_codepoints; + } + + /*static immutable*/ FOLD_MAP[3] FOLD_MAP_LIST = + [ + FOLD_MAP(FOLD_MAP_1.ptr, FOLD_MAP_1_DATA.ptr, FOLD_MAP_1.length, 1), + FOLD_MAP(FOLD_MAP_2.ptr, FOLD_MAP_2_DATA.ptr, FOLD_MAP_2.length, 2), + FOLD_MAP(FOLD_MAP_3.ptr, FOLD_MAP_3_DATA.ptr, FOLD_MAP_3.length, 3), + ]; + + int i; + + /* Fast path for ASCII characters. */ + if(codepoint <= 0x7f) { + info.codepoints[0] = codepoint; + if(ISUPPER_(cast(CHAR)codepoint)) + info.codepoints[0] += 'a' - 'A'; + info.n_codepoints = 1; + return; + } + + /* Try to locate the codepoint in any of the maps. */ + for(i = 0; i < cast(int) (FOLD_MAP_LIST.length); i++) { + int index; + + index = md_unicode_bsearch__(codepoint, FOLD_MAP_LIST[i].map, FOLD_MAP_LIST[i].map_size); + if(index >= 0) { + /* Found the mapping. */ + int n_codepoints = FOLD_MAP_LIST[i].n_codepoints; + const uint* map = FOLD_MAP_LIST[i].map; + const uint* codepoints = FOLD_MAP_LIST[i].data + (index * n_codepoints); + + memcpy(info.codepoints.ptr, codepoints, uint.sizeof * n_codepoints); + info.n_codepoints = n_codepoints; + + if(FOLD_MAP_LIST[i].map[index] != codepoint) { + /* The found mapping maps whole range of codepoints, + * i.e. we have to offset info.codepoints[0] accordingly. */ + if((map[index] & 0x00ffffff)+1 == codepoints[0]) { + /* Alternating type of the range. */ + info.codepoints[0] = codepoint + ((codepoint & 0x1) == (map[index] & 0x1) ? 1 : 0); + } else { + /* Range to range kind of mapping. */ + info.codepoints[0] += (codepoint - (map[index] & 0x00ffffff)); + } + } + + return; + } + } + + /* No mapping found. Map the codepoint to itself. */ + info.codepoints[0] = codepoint; + info.n_codepoints = 1; +} + + +bool IS_UTF8_LEAD1(CHAR ch) +{ + return cast(ubyte)(ch) <= 0x7f; +} + +bool IS_UTF8_LEAD2(CHAR ch) +{ + return (cast(ubyte)(ch) & 0xe0) == 0xc0; +} + +bool IS_UTF8_LEAD3(CHAR ch) +{ + return (cast(ubyte)(ch) & 0xf0) == 0xe0; +} + +bool IS_UTF8_LEAD4(CHAR ch) +{ + return (cast(ubyte)(ch) & 0xf8) == 0xf0; +} + +bool IS_UTF8_TAIL(CHAR ch) +{ + return (cast(ubyte)(ch) & 0xc0) == 0x80; +} + +uint md_decode_utf8__(const(CHAR)* str, SZ str_size, SZ* p_size) +{ + if(!IS_UTF8_LEAD1(str[0])) { + if(IS_UTF8_LEAD2(str[0])) { + if(1 < str_size && IS_UTF8_TAIL(str[1])) { + if(p_size != null) + *p_size = 2; + + return ((cast(uint)str[0] & 0x1f) << 6) | + ((cast(uint)str[1] & 0x3f) << 0); + } + } else if(IS_UTF8_LEAD3(str[0])) { + if(2 < str_size && IS_UTF8_TAIL(str[1]) && IS_UTF8_TAIL(str[2])) { + if(p_size != null) + *p_size = 3; + + return ((cast(uint)str[0] & 0x0f) << 12) | + ((cast(uint)str[1] & 0x3f) << 6) | + ((cast(uint)str[2] & 0x3f) << 0); + } + } else if(IS_UTF8_LEAD4(str[0])) { + if(3 < str_size && IS_UTF8_TAIL(str[1]) && IS_UTF8_TAIL(str[2]) && IS_UTF8_TAIL(str[3])) { + if(p_size != null) + *p_size = 4; + + return ((cast(uint)str[0] & 0x07) << 18) | + ((cast(uint)str[1] & 0x3f) << 12) | + ((cast(uint)str[2] & 0x3f) << 6) | + ((cast(uint)str[3] & 0x3f) << 0); + } + } + } + + if(p_size != null) + *p_size = 1; + return cast(uint) str[0]; +} + +uint md_decode_utf8_before__(MD_CTX* ctx, OFF off) +{ + if(!IS_UTF8_LEAD1(ctx.CH(off-1))) { + if(off > 1 && IS_UTF8_LEAD2(ctx.CH(off-2)) && IS_UTF8_TAIL(ctx.CH(off-1))) + return ((cast(uint)ctx.CH(off-2) & 0x1f) << 6) | + ((cast(uint)ctx.CH(off-1) & 0x3f) << 0); + + if(off > 2 && IS_UTF8_LEAD3(ctx.CH(off-3)) && IS_UTF8_TAIL(ctx.CH(off-2)) && IS_UTF8_TAIL(ctx.CH(off-1))) + return ((cast(uint)ctx.CH(off-3) & 0x0f) << 12) | + ((cast(uint)ctx.CH(off-2) & 0x3f) << 6) | + ((cast(uint)ctx.CH(off-1) & 0x3f) << 0); + + if(off > 3 && IS_UTF8_LEAD4(ctx.CH(off-4)) && IS_UTF8_TAIL(ctx.CH(off-3)) && IS_UTF8_TAIL(ctx.CH(off-2)) && IS_UTF8_TAIL(ctx.CH(off-1))) + return ((cast(uint)ctx.CH(off-4) & 0x07) << 18) | + ((cast(uint)ctx.CH(off-3) & 0x3f) << 12) | + ((cast(uint)ctx.CH(off-2) & 0x3f) << 6) | + ((cast(uint)ctx.CH(off-1) & 0x3f) << 0); + } + + return cast(uint) ctx.CH(off-1); +} + +bool ISUNICODEWHITESPACE_(uint codepoint) +{ + return md_is_unicode_whitespace__(codepoint); +} + +bool ISUNICODEWHITESPACE(MD_CTX* ctx, OFF off) +{ + return md_is_unicode_whitespace__(md_decode_utf8__(ctx.STR(off), ctx.size - (off), null)); +} + +bool ISUNICODEWHITESPACEBEFORE(MD_CTX* ctx, OFF off) +{ + return md_is_unicode_whitespace__(md_decode_utf8_before__(ctx, off)); +} + +bool ISUNICODEPUNCT(MD_CTX* ctx, OFF off) +{ + return md_is_unicode_punct__(md_decode_utf8__(ctx.STR(off), ctx.size - (off), null)); +} + +bool ISUNICODEPUNCTBEFORE(MD_CTX* ctx, OFF off) +{ + return md_is_unicode_punct__(md_decode_utf8_before__(ctx, off)); +} + +uint md_decode_unicode(const(CHAR)* str, OFF off, SZ str_size, SZ* p_char_size) +{ + return md_decode_utf8__(str+off, str_size-off, p_char_size); +} + +/************************************* + *** Helper string manipulations *** + *************************************/ + +/* Fill buffer with copy of the string between 'beg' and 'end' but replace any + * line breaks with given replacement character. + * + * NOTE: Caller is responsible to make sure the buffer is large enough. + * (Given the output is always shorter then input, (end - beg) is good idea + * what the caller should allocate.) + */ +void md_merge_lines(MD_CTX* ctx, OFF beg, OFF end, const(MD_LINE)* lines, int n_lines, + CHAR line_break_replacement_char, CHAR* buffer, SZ* p_size) +{ + CHAR* ptr = buffer; + int line_index = 0; + OFF off = beg; + + while(1) { + const MD_LINE* line = &lines[line_index]; + OFF line_end = line.end; + if(end < line_end) + line_end = end; + + while(off < line_end) { + *ptr = ctx.CH(off); + ptr++; + off++; + } + + if(off >= end) { + *p_size = cast(uint)(ptr - buffer); + return; + } + + *ptr = line_break_replacement_char; + ptr++; + + line_index++; + off = lines[line_index].beg; + } +} + +/* Wrapper of md_merge_lines() which allocates new buffer for the output string. + */ +int md_merge_lines_alloc(MD_CTX* ctx, OFF beg, OFF end, const(MD_LINE)* lines, int n_lines, + CHAR line_break_replacement_char, const(CHAR)** p_str, SZ* p_size) +{ + CHAR* buffer; + + buffer = cast(CHAR*) malloc(CHAR.sizeof * (end - beg)); + if(buffer == null) { + ctx.MD_LOG("malloc() failed."); + return -1; + } + + md_merge_lines(ctx, beg, end, lines, n_lines, + line_break_replacement_char, buffer, p_size); + + *p_str = buffer; + return 0; +} + +OFF md_skip_unicode_whitespace(const(CHAR)* label, OFF off, SZ size) +{ + SZ char_size; + uint codepoint; + + while(off < size) { + codepoint = md_decode_unicode(label, off, size, &char_size); + if(!ISUNICODEWHITESPACE_(codepoint) && !ISNEWLINE_(label[off])) + break; + off += char_size; + } + + return off; +} + + +/****************************** + *** Recognizing raw HTML *** + ******************************/ + +/* md_is_html_tag() may be called when processing inlines (inline raw HTML) + * or when breaking document to blocks (checking for start of HTML block type 7). + * + * When breaking document to blocks, we do not yet know line boundaries, but + * in that case the whole tag has to live on a single line. We distinguish this + * by n_lines == 0. + */ +int md_is_html_tag(MD_CTX* ctx, const(MD_LINE)* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end) +{ + int attr_state; + OFF off = beg; + OFF line_end = (n_lines > 0) ? lines[0].end : ctx.size; + int i = 0; + + assert(ctx.CH(beg) == '<'); + + if(off + 1 >= line_end) + return FALSE; + off++; + + /* For parsing attributes, we need a little state automaton below. + * State -1: no attributes are allowed. + * State 0: attribute could follow after some whitespace. + * State 1: after a whitespace (attribute name may follow). + * State 2: after attribute name ('=' MAY follow). + * State 3: after '=' (value specification MUST follow). + * State 41: in middle of unquoted attribute value. + * State 42: in middle of single-quoted attribute value. + * State 43: in middle of double-quoted attribute value. + */ + attr_state = 0; + + if(ctx.CH(off) == '/') { + /* Closer tag "". No attributes may be present. */ + attr_state = -1; + off++; + } + + /* Tag name */ + if(off >= line_end || !ctx.ISALPHA(off)) + return FALSE; + off++; + while(off < line_end && (ctx.ISALNUM(off) || ctx.CH(off) == '-')) + off++; + + /* (Optional) attributes (if not closer), (optional) '/' (if not closer) + * and final '>'. */ + while(1) { + while(off < line_end && !ctx.ISNEWLINE(off)) { + if(attr_state > 40) { + if(attr_state == 41 && (ctx.ISBLANK(off) || ctx.ISANYOF(off, "\"'=<>`"))) { + attr_state = 0; + off--; /* Put the char back for re-inspection in the new state. */ + } else if(attr_state == 42 && ctx.CH(off) == '\'') { + attr_state = 0; + } else if(attr_state == 43 && ctx.CH(off) == '"') { + attr_state = 0; + } + off++; + } else if(ctx.ISWHITESPACE(off)) { + if(attr_state == 0) + attr_state = 1; + off++; + } else if(attr_state <= 2 && ctx.CH(off) == '>') { + /* End. */ + goto done; + } else if(attr_state <= 2 && ctx.CH(off) == '/' && off+1 < line_end && ctx.CH(off+1) == '>') { + /* End with digraph '/>' */ + off++; + goto done; + } else if((attr_state == 1 || attr_state == 2) && (ctx.ISALPHA(off) || ctx.CH(off) == '_' || ctx.CH(off) == ':')) { + off++; + /* Attribute name */ + while(off < line_end && (ctx.ISALNUM(off) || ctx.ISANYOF(off, "_.:-"))) + off++; + attr_state = 2; + } else if(attr_state == 2 && ctx.CH(off) == '=') { + /* Attribute assignment sign */ + off++; + attr_state = 3; + } else if(attr_state == 3) { + /* Expecting start of attribute value. */ + if(ctx.CH(off) == '"') + attr_state = 43; + else if(ctx.CH(off) == '\'') + attr_state = 42; + else if(!ctx.ISANYOF(off, "\"'=<>`") && !ctx.ISNEWLINE(off)) + attr_state = 41; + else + return FALSE; + off++; + } else { + /* Anything unexpected. */ + return FALSE; + } + } + + /* We have to be on a single line. See definition of start condition + * of HTML block, type 7. */ + if(n_lines == 0) + return FALSE; + + i++; + if(i >= n_lines) + return FALSE; + + off = lines[i].beg; + line_end = lines[i].end; + + if(attr_state == 0 || attr_state == 41) + attr_state = 1; + + if(off >= max_end) + return FALSE; + } + +done: + if(off >= max_end) + return FALSE; + + *p_end = off+1; + return TRUE; +} + +static int +md_scan_for_html_closer(MD_CTX* ctx, const MD_CHAR* str, MD_SIZE len, + const MD_LINE* lines, int n_lines, + OFF beg, OFF max_end, OFF* p_end, + OFF* p_scan_horizon) +{ + OFF off = beg; + int i = 0; + + if(off < *p_scan_horizon && *p_scan_horizon >= max_end - len) { + /* We have already scanned the range up to the max_end so we know + * there is nothing to see. */ + return FALSE; + } + + while(TRUE) { + while(off + len <= lines[i].end && off + len <= max_end) { + if(md_ascii_eq(ctx.STR(off), str, len)) { + /* Success. */ + *p_end = off + len; + return TRUE; + } + off++; + } + + i++; + if(off >= max_end || i >= n_lines) { + /* Failure. */ + *p_scan_horizon = off; + return FALSE; + } + + off = lines[i].beg; + } +} + +static int +md_is_html_comment(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end) +{ + OFF off = beg; + + assert(ctx.CH(beg) == '<'); + + if(off + 4 >= lines[0].end) + return FALSE; + if(ctx.CH(off+1) != '!' || ctx.CH(off+2) != '-' || ctx.CH(off+3) != '-') + return FALSE; + off += 4; + + /* ">" and "." must not follow the opening. */ + if(off < lines[0].end && ctx.CH(off) == '>') + return FALSE; + if(off+1 < lines[0].end && ctx.CH(off) == '-' && ctx.CH(off+1) == '>') + return FALSE; + + /* HTML comment must not contain "--", so we scan just for "--" instead + * of "-." and verify manually that '>' follows. */ + if(md_scan_for_html_closer(ctx, "--", 2, + lines, n_lines, off, max_end, p_end, &ctx.html_comment_horizon)) + { + if(*p_end < max_end && ctx.CH(*p_end) == '>') { + *p_end = *p_end + 1; + return TRUE; + } + } + + return FALSE; +} + +static int +md_is_html_processing_instruction(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end) +{ + OFF off = beg; + + if(off + 2 >= lines[0].end) + return FALSE; + if(ctx.CH(off+1) != '?') + return FALSE; + off += 2; + + return md_scan_for_html_closer(ctx, "?>", 2, + lines, n_lines, off, max_end, p_end, &ctx.html_proc_instr_horizon); +} + +static int +md_is_html_declaration(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end) +{ + OFF off = beg; + + if(off + 2 >= lines[0].end) + return FALSE; + if(ctx.CH(off+1) != '!') + return FALSE; + off += 2; + + /* Declaration name. */ + if(off >= lines[0].end || !ctx.ISALPHA(off)) + return FALSE; + off++; + while(off < lines[0].end && ctx.ISALPHA(off)) + off++; + if(off < lines[0].end && !ctx.ISWHITESPACE(off)) + return FALSE; + + return md_scan_for_html_closer(ctx, ">", 1, + lines, n_lines, off, max_end, p_end, &ctx.html_decl_horizon); +} + +static int +md_is_html_cdata(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end) +{ + string open_str = "= lines[0].end) + return FALSE; + if(memcmp(ctx.STR(off), open_str.ptr, open_str.length) != 0) + return FALSE; + off += open_str.length; + + if(lines[n_lines-1].end < max_end) + max_end = lines[n_lines-1].end - 2; + + return md_scan_for_html_closer(ctx, "]]>", 3, + lines, n_lines, off, max_end, p_end, &ctx.html_cdata_horizon); +} + +static int +md_is_html_any(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end) +{ + assert(ctx.CH(beg) == '<'); + return (md_is_html_tag(ctx, lines, n_lines, beg, max_end, p_end) || + md_is_html_comment(ctx, lines, n_lines, beg, max_end, p_end) || + md_is_html_processing_instruction(ctx, lines, n_lines, beg, max_end, p_end) || + md_is_html_declaration(ctx, lines, n_lines, beg, max_end, p_end) || + md_is_html_cdata(ctx, lines, n_lines, beg, max_end, p_end)); +} + + +/**************************** + *** Recognizing Entity *** + ****************************/ + +static int +md_is_hex_entity_contents(MD_CTX* ctx, const(CHAR)* text, OFF beg, OFF max_end, OFF* p_end) +{ + OFF off = beg; + + while(off < max_end && ISXDIGIT_(text[off]) && off - beg <= 8) + off++; + + if(1 <= off - beg && off - beg <= 6) { + *p_end = off; + return TRUE; + } else { + return FALSE; + } +} + +static int +md_is_dec_entity_contents(MD_CTX* ctx, const(CHAR)* text, OFF beg, OFF max_end, OFF* p_end) +{ + OFF off = beg; + + while(off < max_end && ISDIGIT_(text[off]) && off - beg <= 8) + off++; + + if(1 <= off - beg && off - beg <= 7) { + *p_end = off; + return TRUE; + } else { + return FALSE; + } +} + +static int +md_is_named_entity_contents(MD_CTX* ctx, const(CHAR)* text, OFF beg, OFF max_end, OFF* p_end) +{ + OFF off = beg; + + if(off < max_end && ISALPHA_(text[off])) + off++; + else + return FALSE; + + while(off < max_end && ISALNUM_(text[off]) && off - beg <= 48) + off++; + + if(2 <= off - beg && off - beg <= 48) { + *p_end = off; + return TRUE; + } else { + return FALSE; + } +} + +static int +md_is_entity_str(MD_CTX* ctx, const(CHAR)* text, OFF beg, OFF max_end, OFF* p_end) +{ + int is_contents; + OFF off = beg; + + assert(text[off] == '&'); + off++; + + if(off+2 < max_end && text[off] == '#' && (text[off+1] == 'x' || text[off+1] == 'X')) + is_contents = md_is_hex_entity_contents(ctx, text, off+2, max_end, &off); + else if(off+1 < max_end && text[off] == '#') + is_contents = md_is_dec_entity_contents(ctx, text, off+1, max_end, &off); + else + is_contents = md_is_named_entity_contents(ctx, text, off, max_end, &off); + + if(is_contents && off < max_end && text[off] == ';') { + *p_end = off+1; + return TRUE; + } else { + return FALSE; + } +} + +static int +md_is_entity(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end) +{ + return md_is_entity_str(ctx, ctx.text, beg, max_end, p_end); +} + + +/****************************** + *** Attribute Management *** + ******************************/ + +struct MD_ATTRIBUTE_BUILD +{ + CHAR* text = null; + MD_TEXTTYPE* substr_types = null; + OFF* substr_offsets = null; + int substr_count = 0; + int substr_alloc = 0; + MD_TEXTTYPE[1] trivial_types = [0]; + OFF[2] trivial_offsets = [0, 0]; +} + + +enum MD_BUILD_ATTR_NO_ESCAPES = 0x0001; + +void* realloc_safe(void* ptr, size_t newSize) +{ + import core.stdc.stdlib : free, realloc; + + if (newSize == 0) + { + free(ptr); + return null; + } + + return realloc(ptr, newSize); +} + + +int md_build_attr_append_substr(MD_CTX* ctx, MD_ATTRIBUTE_BUILD* build, + MD_TEXTTYPE type, OFF off) +{ + if(build.substr_count >= build.substr_alloc) { + MD_TEXTTYPE* new_substr_types; + OFF* new_substr_offsets; + + build.substr_alloc = (build.substr_alloc == 0 ? 8 : build.substr_alloc * 2); + + new_substr_types = cast(MD_TEXTTYPE*) realloc_safe(build.substr_types, + build.substr_alloc * MD_TEXTTYPE.sizeof); + if(new_substr_types == null) { + ctx.MD_LOG("realloc() failed."); + return -1; + } + /* Note +1 to reserve space for final offset (== raw_size). */ + new_substr_offsets = cast(OFF*) realloc_safe(build.substr_offsets, + (build.substr_alloc+1) * OFF.sizeof); + if(new_substr_offsets == null) { + ctx.MD_LOG("realloc() failed."); + free(new_substr_types); + return -1; + } + + build.substr_types = new_substr_types; + build.substr_offsets = new_substr_offsets; + } + + build.substr_types[build.substr_count] = type; + build.substr_offsets[build.substr_count] = off; + build.substr_count++; + return 0; +} + +void md_free_attribute(MD_CTX* ctx, MD_ATTRIBUTE_BUILD* build) +{ + if(build.substr_alloc > 0) { + free(build.text); + free(build.substr_types); + free(build.substr_offsets); + } +} + +int md_build_attribute(MD_CTX* ctx, const(CHAR)* raw_text, SZ raw_size, + uint flags, MD_ATTRIBUTE* attr, MD_ATTRIBUTE_BUILD* build) +{ + OFF raw_off, off; + int is_trivial; + int ret = 0; + + memset(build, 0, MD_ATTRIBUTE_BUILD.sizeof); + + /* If there is no backslash and no ampersand, build trivial attribute + * without any malloc(). */ + is_trivial = TRUE; + for(raw_off = 0; raw_off < raw_size; raw_off++) { + if(ISANYOF3_(raw_text[raw_off], '\\', '&', '\0')) { + is_trivial = FALSE; + break; + } + } + + if(is_trivial) { + build.text = cast(CHAR*) (raw_size ? raw_text : null); + build.substr_types = build.trivial_types.ptr; + build.substr_offsets = build.trivial_offsets.ptr; + build.substr_count = 1; + build.substr_alloc = 0; + build.trivial_types[0] = MD_TEXT_NORMAL; + build.trivial_offsets[0] = 0; + build.trivial_offsets[1] = raw_size; + off = raw_size; + } else { + build.text = cast(CHAR*) malloc(raw_size * CHAR.sizeof); + if(build.text == null) { + ctx.MD_LOG("malloc() failed."); + goto abort; + } + + raw_off = 0; + off = 0; + + while(raw_off < raw_size) { + if(raw_text[raw_off] == '\0') { + ret = (md_build_attr_append_substr(ctx, build, MD_TEXT_NULLCHAR, off)); + if (ret < 0) goto abort; + memcpy(build.text + off, raw_text + raw_off, 1); + off++; + raw_off++; + continue; + } + + if(raw_text[raw_off] == '&') { + OFF ent_end; + + if(md_is_entity_str(ctx, raw_text, raw_off, raw_size, &ent_end)) { + ret = (md_build_attr_append_substr(ctx, build, MD_TEXT_ENTITY, off)); + if (ret < 0) goto abort; + memcpy(build.text + off, raw_text + raw_off, ent_end - raw_off); + off += ent_end - raw_off; + raw_off = ent_end; + continue; + } + } + + if(build.substr_count == 0 || build.substr_types[build.substr_count-1] != MD_TEXT_NORMAL) + { + ret = (md_build_attr_append_substr(ctx, build, MD_TEXT_NORMAL, off)); + if (ret < 0) goto abort; + } + + if(!(flags & MD_BUILD_ATTR_NO_ESCAPES) && + raw_text[raw_off] == '\\' && raw_off+1 < raw_size && + (ISPUNCT_(raw_text[raw_off+1]) || ISNEWLINE_(raw_text[raw_off+1]))) + raw_off++; + + build.text[off++] = raw_text[raw_off++]; + } + build.substr_offsets[build.substr_count] = off; + } + + attr.text = build.text; + attr.size = off; + attr.substr_offsets = build.substr_offsets; + attr.substr_types = build.substr_types; + return 0; + +abort: + md_free_attribute(ctx, build); + return -1; +} + + +/********************************************* + *** Dictionary of Reference Definitions *** + *********************************************/ + +enum MD_FNV1A_BASE = 2166136261; +enum MD_FNV1A_PRIME = 16777619; + +uint md_fnv1a(uint base, const(void)* data, size_t n) +{ + const(ubyte)* buf = cast(const(ubyte)*) data; + uint hash = base; + size_t i; + + for(i = 0; i < n; i++) { + hash ^= buf[i]; + hash *= MD_FNV1A_PRIME; + } + + return hash; +} + + +struct MD_REF_DEF +{ + const(CHAR)* label; + const(CHAR)* title; + uint hash; + SZ label_size; + bool label_needs_free; + bool title_needs_free; + SZ title_size; + OFF dest_beg; + OFF dest_end; +}; + +/* Label equivalence is quite complicated with regards to whitespace and case + * folding. This complicates computing a hash of it as well as direct comparison + * of two labels. */ + +uint md_link_label_hash(const(CHAR)* label, SZ size) +{ + uint hash = MD_FNV1A_BASE; + OFF off; + uint codepoint; + int is_whitespace = FALSE; + + off = md_skip_unicode_whitespace(label, 0, size); + while(off < size) { + SZ char_size; + + codepoint = md_decode_unicode(label, off, size, &char_size); + is_whitespace = ISUNICODEWHITESPACE_(codepoint) || ISNEWLINE_(label[off]); + + if(is_whitespace) { + codepoint = ' '; + hash = md_fnv1a(hash, &codepoint, uint.sizeof); + off = md_skip_unicode_whitespace(label, off, size); + } else { + MD_UNICODE_FOLD_INFO fold_info; + + md_get_unicode_fold_info(codepoint, &fold_info); + hash = md_fnv1a(hash, fold_info.codepoints.ptr, fold_info.n_codepoints * uint.sizeof); + off += char_size; + } + } + + return hash; +} + +OFF md_link_label_cmp_load_fold_info(const(CHAR)* label, OFF off, SZ size, + MD_UNICODE_FOLD_INFO* fold_info) +{ + uint codepoint; + SZ char_size; + + if(off >= size) { + /* Treat end of link label as a whitespace. */ + goto whitespace; + } + + if(ISNEWLINE_(label[off])) { + /* Treat new lines as a whitespace. */ + off++; + goto whitespace; + } + + codepoint = md_decode_unicode(label, off, size, &char_size); + off += char_size; + if(ISUNICODEWHITESPACE_(codepoint)) { + /* Treat all whitespace as equivalent */ + goto whitespace; + } + + /* Get real folding info. */ + md_get_unicode_fold_info(codepoint, fold_info); + return off; + +whitespace: + fold_info.codepoints[0] = ' '; + fold_info.n_codepoints = 1; + return off; +} + +static int +md_link_label_cmp(const(CHAR)* a_label, SZ a_size, const(CHAR)* b_label, SZ b_size) +{ + OFF a_off; + OFF b_off; + int a_reached_end = FALSE; + int b_reached_end = FALSE; + MD_UNICODE_FOLD_INFO a_fi = { 0 }; + MD_UNICODE_FOLD_INFO b_fi = { 0 }; + OFF a_fi_off = 0; + OFF b_fi_off = 0; + int cmp; + + a_off = md_skip_unicode_whitespace(a_label, 0, a_size); + b_off = md_skip_unicode_whitespace(b_label, 0, b_size); + while(!a_reached_end && !b_reached_end) { + /* If needed, load fold info for next char. */ + if(a_fi_off >= a_fi.n_codepoints) { + a_fi_off = 0; + a_off = md_link_label_cmp_load_fold_info(a_label, a_off, a_size, &a_fi); + a_reached_end = (a_off >= a_size); + } + if(b_fi_off >= b_fi.n_codepoints) { + b_fi_off = 0; + b_off = md_link_label_cmp_load_fold_info(b_label, b_off, b_size, &b_fi); + b_reached_end = (b_off >= b_size); + } + + cmp = b_fi.codepoints[b_fi_off] - a_fi.codepoints[a_fi_off]; + if(cmp != 0) + return cmp; + + a_fi_off++; + b_fi_off++; + } + + return 0; +} + +struct MD_REF_DEF_LIST +{ +nothrow: +@nogc: + + int n_ref_defs; + int alloc_ref_defs; + + /* Valid items always point into ctx.ref_defs[] */ + MD_REF_DEF* ref_defs_space; // Starting here, a list of pointer at the end of the struct + + // To allocate a MD_REF_DEF_LIST + static size_t SIZEOF(int numDefRefs) + { + return 8 + (MD_REF_DEF*).sizeof * numDefRefs; + } + + // Returns: a slice of ref defs embedded at the end of the struct + static MD_REF_DEF*[] refDefs(MD_REF_DEF_LIST* list) + { + return (&(list.ref_defs_space))[0..list.n_ref_defs]; + } + + ref MD_REF_DEF* ref_defs_nth(size_t index) + { + MD_REF_DEF** base = &ref_defs_space; + return base[index]; + } +} + +extern(C) int md_ref_def_cmp(scope const(void)* a, scope const void* b) +{ + const(MD_REF_DEF)* a_ref = *cast(const(MD_REF_DEF*)*)a; + const(MD_REF_DEF)* b_ref = *cast(const(MD_REF_DEF*)*)b; + + if(a_ref.hash < b_ref.hash) + return -1; + else if(a_ref.hash > b_ref.hash) + return +1; + else + return md_link_label_cmp(a_ref.label, a_ref.label_size, b_ref.label, b_ref.label_size); +} + +extern(C) int md_ref_def_cmp_stable(scope const(void)* a, scope const(void)* b) +{ + int cmp; + + cmp = md_ref_def_cmp(a, b); + + /* Ensure stability of the sorting. */ + if(cmp == 0) { + const(MD_REF_DEF)* a_ref = *cast(const(MD_REF_DEF*)*)a; + const(MD_REF_DEF)* b_ref = *cast(const(MD_REF_DEF*)*)b; + + if(a_ref < b_ref) + cmp = -1; + else if(a_ref > b_ref) + cmp = +1; + else + cmp = 0; + } + + return cmp; +} + +int md_build_ref_def_hashtable(MD_CTX* ctx) +{ + int i, j; + + if(ctx.n_ref_defs == 0) + return 0; + + ctx.ref_def_hashtable_size = (ctx.n_ref_defs * 5) / 4; + ctx.ref_def_hashtable = cast(void**) malloc(ctx.ref_def_hashtable_size * (void*).sizeof); + if(ctx.ref_def_hashtable == null) { + ctx.MD_LOG("malloc() failed."); + goto abort; + } + memset(ctx.ref_def_hashtable, 0, ctx.ref_def_hashtable_size * (void*).sizeof); + + /* Each member of ctx.ref_def_hashtable[] can be: + * -- null, + * -- pointer to the MD_REF_DEF in ctx.ref_defs[], or + * -- pointer to a MD_REF_DEF_LIST, which holds multiple pointers to + * such MD_REF_DEFs. + */ + for(i = 0; i < ctx.n_ref_defs; i++) { + MD_REF_DEF* def = &ctx.ref_defs[i]; + void* bucket; + MD_REF_DEF_LIST* list; + + def.hash = md_link_label_hash(def.label, def.label_size); + bucket = ctx.ref_def_hashtable[def.hash % ctx.ref_def_hashtable_size]; + + if(bucket == null) { + ctx.ref_def_hashtable[def.hash % ctx.ref_def_hashtable_size] = def; + continue; + } + + if(ctx.ref_defs <= cast(MD_REF_DEF*) bucket && cast(MD_REF_DEF*) bucket < ctx.ref_defs + ctx.n_ref_defs) { + /* The bucket already contains one ref. def. Lets see whether it + * is the same label (ref. def. duplicate) or different one + * (hash conflict). */ + MD_REF_DEF* old_def = cast(MD_REF_DEF*) bucket; + + if(md_link_label_cmp(def.label, def.label_size, old_def.label, old_def.label_size) == 0) { + /* Ignore this ref. def. */ + continue; + } + + /* Make the bucket capable of holding more ref. defs. */ + list = cast(MD_REF_DEF_LIST*) malloc(MD_REF_DEF_LIST.SIZEOF(4)); + if(list == null) { + ctx.MD_LOG("malloc() failed."); + goto abort; + } + list.ref_defs_nth(0) = old_def; + list.ref_defs_nth(1) = def; + list.n_ref_defs = 2; + list.alloc_ref_defs = 4; + ctx.ref_def_hashtable[def.hash % ctx.ref_def_hashtable_size] = list; + continue; + } + + /* Append the def to the bucket list. */ + list = cast(MD_REF_DEF_LIST*) bucket; + if(list.n_ref_defs >= list.alloc_ref_defs) { + MD_REF_DEF_LIST* list_tmp = cast(MD_REF_DEF_LIST*) realloc_safe(list, MD_REF_DEF_LIST.SIZEOF( 2 * list.alloc_ref_defs )); + if(list_tmp == null) { + ctx.MD_LOG("realloc() failed."); + goto abort; + } + list = list_tmp; + list.alloc_ref_defs *= 2; + ctx.ref_def_hashtable[def.hash % ctx.ref_def_hashtable_size] = list; + } + + list.ref_defs_nth(list.n_ref_defs) = def; + list.n_ref_defs++; + } + + /* Sort the complex buckets so we can use bsearch() with them. */ + for(i = 0; i < ctx.ref_def_hashtable_size; i++) { + void* bucket = ctx.ref_def_hashtable[i]; + MD_REF_DEF_LIST* list; + + if(bucket == null) + continue; + if(ctx.ref_defs <= cast(MD_REF_DEF*) bucket && cast(MD_REF_DEF*) bucket < ctx.ref_defs + ctx.n_ref_defs) + continue; + + list = cast(MD_REF_DEF_LIST*) bucket; + qsort(MD_REF_DEF_LIST.refDefs(list).ptr, list.n_ref_defs, (MD_REF_DEF*).sizeof, &md_ref_def_cmp_stable); + + /* Disable duplicates. */ + for(j = 1; j < list.n_ref_defs; j++) { + if(md_ref_def_cmp(&list.ref_defs_nth(j-1), &list.ref_defs_nth(j)) == 0) + list.ref_defs_nth(j) = list.ref_defs_nth(j-1); + } + } + + return 0; + +abort: + return -1; +} + +static void +md_free_ref_def_hashtable(MD_CTX* ctx) +{ + if(ctx.ref_def_hashtable != null) { + int i; + + for(i = 0; i < ctx.ref_def_hashtable_size; i++) { + void* bucket = ctx.ref_def_hashtable[i]; + if(bucket == null) + continue; + if(ctx.ref_defs <= cast(MD_REF_DEF*) bucket && cast(MD_REF_DEF*) bucket < ctx.ref_defs + ctx.n_ref_defs) + continue; + free(bucket); + } + + free(ctx.ref_def_hashtable); + } +} + +const(MD_REF_DEF)* md_lookup_ref_def(MD_CTX* ctx, const(CHAR)* label, SZ label_size) +{ + uint hash; + void* bucket; + + if(ctx.ref_def_hashtable_size == 0) + return null; + + hash = md_link_label_hash(label, label_size); + bucket = ctx.ref_def_hashtable[hash % ctx.ref_def_hashtable_size]; + + if(bucket == null) { + return null; + } else if(ctx.ref_defs <= cast(MD_REF_DEF*) bucket && cast(MD_REF_DEF*) bucket < ctx.ref_defs + ctx.n_ref_defs) { + const MD_REF_DEF* def = cast(MD_REF_DEF*) bucket; + + if(md_link_label_cmp(def.label, def.label_size, label, label_size) == 0) + return def; + else + return null; + } else { + MD_REF_DEF_LIST* list = cast(MD_REF_DEF_LIST*) bucket; + MD_REF_DEF key_buf; + const MD_REF_DEF* key = &key_buf; + const(MD_REF_DEF*)* ret; + + key_buf.label = cast(CHAR*) label; + key_buf.label_size = label_size; + key_buf.hash = md_link_label_hash(key_buf.label, key_buf.label_size); + + ret = cast(const(MD_REF_DEF*)*) bsearch(&key, MD_REF_DEF_LIST.refDefs(list).ptr, + list.n_ref_defs, (MD_REF_DEF*).sizeof, &md_ref_def_cmp); + if(ret != null) + return *ret; + else + return null; + } +} + + +/*************************** + *** Recognizing Links *** + ***************************/ + +/* Note this code is partially shared between processing inlines and blocks + * as reference definitions and links share some helper parser functions. + */ + +struct MD_LINK_ATTR +{ + OFF dest_beg; + OFF dest_end; + + const(CHAR)* title; + SZ title_size; + bool title_needs_free; +} + + +static int +md_is_link_label(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, + OFF* p_end, int* p_beg_line_index, int* p_end_line_index, + OFF* p_contents_beg, OFF* p_contents_end) +{ + OFF off = beg; + OFF contents_beg = 0; + OFF contents_end = 0; + int line_index = 0; + int len = 0; + + if(ctx.CH(off) != '[') + return FALSE; + off++; + + while(1) { + OFF line_end = lines[line_index].end; + + while(off < line_end) { + if(ctx.CH(off) == '\\' && off+1 < ctx.size && (ctx.ISPUNCT(off+1) || ctx.ISNEWLINE(off+1))) { + if(contents_end == 0) { + contents_beg = off; + *p_beg_line_index = line_index; + } + contents_end = off + 2; + off += 2; + } else if(ctx.CH(off) == '[') { + return FALSE; + } else if(ctx.CH(off) == ']') { + if(contents_beg < contents_end) { + /* Success. */ + *p_contents_beg = contents_beg; + *p_contents_end = contents_end; + *p_end = off+1; + *p_end_line_index = line_index; + return TRUE; + } else { + /* Link label must have some non-whitespace contents. */ + return FALSE; + } + } else { + uint codepoint; + SZ char_size; + + codepoint = md_decode_unicode(ctx.text, off, ctx.size, &char_size); + if(!ISUNICODEWHITESPACE_(codepoint)) { + if(contents_end == 0) { + contents_beg = off; + *p_beg_line_index = line_index; + } + contents_end = off + char_size; + } + + off += char_size; + } + + len++; + if(len > 999) + return FALSE; + } + + line_index++; + len++; + if(line_index < n_lines) + off = lines[line_index].beg; + else + break; + } + + return FALSE; +} + +static int +md_is_link_destination_A(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end, + OFF* p_contents_beg, OFF* p_contents_end) +{ + OFF off = beg; + + if(off >= max_end || ctx.CH(off) != '<') + return FALSE; + off++; + + while(off < max_end) { + if(ctx.CH(off) == '\\' && off+1 < max_end && ctx.ISPUNCT(off+1)) { + off += 2; + continue; + } + + if(ctx.ISNEWLINE(off) || ctx.CH(off) == '<') + return FALSE; + + if(ctx.CH(off) == '>') { + /* Success. */ + *p_contents_beg = beg+1; + *p_contents_end = off; + *p_end = off+1; + return TRUE; + } + + off++; + } + + return FALSE; +} + +static int +md_is_link_destination_B(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end, + OFF* p_contents_beg, OFF* p_contents_end) +{ + OFF off = beg; + int parenthesis_level = 0; + + while(off < max_end) { + if(ctx.CH(off) == '\\' && off+1 < max_end && ctx.ISPUNCT(off+1)) { + off += 2; + continue; + } + + if(ctx.ISWHITESPACE(off) || ctx.ISCNTRL(off)) + break; + + /* Link destination may include balanced pairs of unescaped '(' ')'. + * Note we limit the maximal nesting level by 32 to protect us from + * https://github.com/jgm/cmark/issues/214 */ + if(ctx.CH(off) == '(') { + parenthesis_level++; + if(parenthesis_level > 32) + return FALSE; + } else if(ctx.CH(off) == ')') { + if(parenthesis_level == 0) + break; + parenthesis_level--; + } + + off++; + } + + if(parenthesis_level != 0 || off == beg) + return FALSE; + + /* Success. */ + *p_contents_beg = beg; + *p_contents_end = off; + *p_end = off; + return TRUE; +} + +static int +md_is_link_destination(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end, + OFF* p_contents_beg, OFF* p_contents_end) +{ + if(ctx.CH(beg) == '<') + return md_is_link_destination_A(ctx, beg, max_end, p_end, p_contents_beg, p_contents_end); + else + return md_is_link_destination_B(ctx, beg, max_end, p_end, p_contents_beg, p_contents_end); +} + +static int +md_is_link_title(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, + OFF* p_end, int* p_beg_line_index, int* p_end_line_index, + OFF* p_contents_beg, OFF* p_contents_end) +{ + OFF off = beg; + CHAR closer_char; + int line_index = 0; + + /* White space with up to one line break. */ + while(off < lines[line_index].end && ctx.ISWHITESPACE(off)) + off++; + if(off >= lines[line_index].end) { + line_index++; + if(line_index >= n_lines) + return FALSE; + off = lines[line_index].beg; + } + if(off == beg) + return FALSE; + + *p_beg_line_index = line_index; + + /* First char determines how to detect end of it. */ + switch(ctx.CH(off)) { + case '"': closer_char = '"'; break; + case '\'': closer_char = '\''; break; + case '(': closer_char = ')'; break; + default: return FALSE; + } + off++; + + *p_contents_beg = off; + + while(line_index < n_lines) { + OFF line_end = lines[line_index].end; + + while(off < line_end) { + if(ctx.CH(off) == '\\' && off+1 < ctx.size && (ctx.ISPUNCT(off+1) || ctx.ISNEWLINE(off+1))) { + off++; + } else if(ctx.CH(off) == closer_char) { + /* Success. */ + *p_contents_end = off; + *p_end = off+1; + *p_end_line_index = line_index; + return TRUE; + } else if(closer_char == ')' && ctx.CH(off) == '(') { + /* ()-style title cannot contain (unescaped '(')) */ + return FALSE; + } + + off++; + } + + line_index++; + } + + return FALSE; +} + +/* Returns 0 if it is not a reference definition. + * + * Returns N > 0 if it is a reference definition. N then corresponds to the + * number of lines forming it). In this case the definition is stored for + * resolving any links referring to it. + * + * Returns -1 in case of an error (out of memory). + */ +int md_is_link_reference_definition(MD_CTX* ctx, const(MD_LINE)* lines, int n_lines) +{ + OFF label_contents_beg; + OFF label_contents_end; + int label_contents_line_index = -1; + int label_is_multiline; + const(CHAR)* label; + SZ label_size; + bool label_needs_free = false; + OFF dest_contents_beg; + OFF dest_contents_end; + OFF title_contents_beg; + OFF title_contents_end; + int title_contents_line_index; + int title_is_multiline; + OFF off; + int line_index = 0; + int tmp_line_index; + MD_REF_DEF* def; + int ret; + + /* Link label. */ + if(!md_is_link_label(ctx, lines, n_lines, lines[0].beg, + &off, &label_contents_line_index, &line_index, + &label_contents_beg, &label_contents_end)) + return FALSE; + label_is_multiline = (label_contents_line_index != line_index); + + /* Colon. */ + if(off >= lines[line_index].end || ctx.CH(off) != ':') + return FALSE; + off++; + + /* Optional white space with up to one line break. */ + while(off < lines[line_index].end && ctx.ISWHITESPACE(off)) + off++; + if(off >= lines[line_index].end) { + line_index++; + if(line_index >= n_lines) + return FALSE; + off = lines[line_index].beg; + } + + /* Link destination. */ + if(!md_is_link_destination(ctx, off, lines[line_index].end, + &off, &dest_contents_beg, &dest_contents_end)) + return FALSE; + + /* (Optional) title. Note we interpret it as an title only if nothing + * more follows on its last line. */ + if(md_is_link_title(ctx, lines + line_index, n_lines - line_index, off, + &off, &title_contents_line_index, &tmp_line_index, + &title_contents_beg, &title_contents_end) + && off >= lines[line_index + tmp_line_index].end) + { + title_is_multiline = (tmp_line_index != title_contents_line_index); + title_contents_line_index += line_index; + line_index += tmp_line_index; + } else { + /* Not a title. */ + title_is_multiline = FALSE; + title_contents_beg = off; + title_contents_end = off; + title_contents_line_index = 0; + } + + /* Nothing more can follow on the last line. */ + if(off < lines[line_index].end) + return FALSE; + + /* Construct label. */ + if(!label_is_multiline) { + label = cast(CHAR*) ctx.STR(label_contents_beg); + label_size = label_contents_end - label_contents_beg; + label_needs_free = false; + } else { + ret = (md_merge_lines_alloc(ctx, label_contents_beg, label_contents_end, + lines + label_contents_line_index, n_lines - label_contents_line_index, + ' ', &label, &label_size)); + if (ret < 0) goto abort; + label_needs_free = true; + } + + /* Store the reference definition. */ + if(ctx.n_ref_defs >= ctx.alloc_ref_defs) { + MD_REF_DEF* new_defs; + + ctx.alloc_ref_defs = (ctx.alloc_ref_defs > 0 ? ctx.alloc_ref_defs * 2 : 16); + new_defs = cast(MD_REF_DEF*) realloc_safe(ctx.ref_defs, ctx.alloc_ref_defs * MD_REF_DEF.sizeof); + if(new_defs == null) { + ctx.MD_LOG("realloc() failed."); + ret = -1; + goto abort; + } + + ctx.ref_defs = new_defs; + } + + def = &ctx.ref_defs[ctx.n_ref_defs]; + memset(def, 0, MD_REF_DEF.sizeof); + + def.label = label; + def.label_size = label_size; + def.label_needs_free = label_needs_free; + + def.dest_beg = dest_contents_beg; + def.dest_end = dest_contents_end; + + if(title_contents_beg >= title_contents_end) { + def.title = null; + def.title_size = 0; + } else if(!title_is_multiline) { + def.title = cast(CHAR*) ctx.STR(title_contents_beg); + def.title_size = title_contents_end - title_contents_beg; + } else { + ret = (md_merge_lines_alloc(ctx, title_contents_beg, title_contents_end, + lines + title_contents_line_index, n_lines - title_contents_line_index, + '\n', &def.title, &def.title_size)); + if (ret < 0) goto abort; + def.title_needs_free = true; + } + + /* Success. */ + ctx.n_ref_defs++; + return line_index + 1; + +abort: + /* Failure. */ + if(label_needs_free) + free(cast(void*)label); // Note: const_cast here + return -1; +} + +static int +md_is_link_reference(MD_CTX* ctx, const(MD_LINE)* lines, int n_lines, + OFF beg, OFF end, MD_LINK_ATTR* attr) +{ + const(MD_REF_DEF)* def; + const(MD_LINE)* beg_line; + const(MD_LINE)* end_line; + const(CHAR)* label; + SZ label_size; + int ret; + + assert(ctx.CH(beg) == '[' || ctx.CH(beg) == '!'); + assert(ctx.CH(end-1) == ']'); + + beg += (ctx.CH(beg) == '!' ? 2 : 1); + end--; + + /* Find lines corresponding to the beg and end positions. */ + assert(lines[0].beg <= beg); + beg_line = lines; + while(beg >= beg_line.end) + beg_line++; + + assert(end <= lines[n_lines-1].end); + end_line = beg_line; + while(end >= end_line.end) + end_line++; + + if(beg_line != end_line) { + ret = (md_merge_lines_alloc(ctx, beg, end, beg_line, + cast(int)(n_lines - (beg_line - lines)), ' ', &label, &label_size)); + if (ret < 0) goto abort; + } else { + label = cast(CHAR*) ctx.STR(beg); + label_size = end - beg; + } + + def = md_lookup_ref_def(ctx, label, label_size); + if(def != null) { + attr.dest_beg = def.dest_beg; + attr.dest_end = def.dest_end; + attr.title = def.title; + attr.title_size = def.title_size; + attr.title_needs_free = false; + } + + if(beg_line != end_line) + free(cast(void*)label); // Note: const_cast here + + ret = (def != null); + +abort: + return ret; +} + +static int +md_is_inline_link_spec(MD_CTX* ctx, const MD_LINE* lines, int n_lines, + OFF beg, OFF* p_end, MD_LINK_ATTR* attr) +{ + int line_index = 0; + int tmp_line_index; + OFF title_contents_beg; + OFF title_contents_end; + int title_contents_line_index; + int title_is_multiline; + OFF off = beg; + int ret = FALSE; + + while(off >= lines[line_index].end) + line_index++; + + assert(ctx.CH(off) == '('); + off++; + + /* Optional white space with up to one line break. */ + while(off < lines[line_index].end && ctx.ISWHITESPACE(off)) + off++; + if(off >= lines[line_index].end && ctx.ISNEWLINE(off)) { + line_index++; + if(line_index >= n_lines) + return FALSE; + off = lines[line_index].beg; + } + + /* Link destination may be omitted, but only when not also having a title. */ + if(off < ctx.size && ctx.CH(off) == ')') { + attr.dest_beg = off; + attr.dest_end = off; + attr.title = null; + attr.title_size = 0; + attr.title_needs_free = false; + off++; + *p_end = off; + return TRUE; + } + + /* Link destination. */ + if(!md_is_link_destination(ctx, off, lines[line_index].end, + &off, &attr.dest_beg, &attr.dest_end)) + return FALSE; + + /* (Optional) title. */ + if(md_is_link_title(ctx, lines + line_index, n_lines - line_index, off, + &off, &title_contents_line_index, &tmp_line_index, + &title_contents_beg, &title_contents_end)) + { + title_is_multiline = (tmp_line_index != title_contents_line_index); + title_contents_line_index += line_index; + line_index += tmp_line_index; + } else { + /* Not a title. */ + title_is_multiline = FALSE; + title_contents_beg = off; + title_contents_end = off; + title_contents_line_index = 0; + } + + /* Optional whitespace followed with final ')'. */ + while(off < lines[line_index].end && ctx.ISWHITESPACE(off)) + off++; + if(off >= lines[line_index].end && ctx.ISNEWLINE(off)) { + line_index++; + if(line_index >= n_lines) + return FALSE; + off = lines[line_index].beg; + } + if(ctx.CH(off) != ')') + goto abort; + off++; + + if(title_contents_beg >= title_contents_end) { + attr.title = null; + attr.title_size = 0; + attr.title_needs_free = false; + } else if(!title_is_multiline) { + attr.title = cast(CHAR*) ctx.STR(title_contents_beg); // Note: const_cast here! + attr.title_size = title_contents_end - title_contents_beg; + attr.title_needs_free = false; + } else { + ret = (md_merge_lines_alloc(ctx, title_contents_beg, title_contents_end, + lines + title_contents_line_index, n_lines - title_contents_line_index, + '\n', &attr.title, &attr.title_size)); + if (ret < 0) goto abort; + attr.title_needs_free = true; + } + + *p_end = off; + ret = TRUE; + +abort: + return ret; +} + +void md_free_ref_defs(MD_CTX* ctx) +{ + int i; + + for(i = 0; i < ctx.n_ref_defs; i++) { + MD_REF_DEF* def = &ctx.ref_defs[i]; + + if(def.label_needs_free) + free(cast(void*)def.label); // Note: const_cast here + if(def.title_needs_free) + free(cast(void*)def.title); // Note: const_cast here + } + + free(ctx.ref_defs); +} + + +/****************************************** + *** Processing Inlines (a.k.a Spans) *** + ******************************************/ + +/* We process inlines in few phases: + * + * (1) We go through the block text and collect all significant characters + * which may start/end a span or some other significant position into + * ctx.marks[]. Core of this is what md_collect_marks() does. + * + * We also do some very brief preliminary context-less analysis, whether + * it might be opener or closer (e.g. of an emphasis span). + * + * This speeds the other steps as we do not need to re-iterate over all + * characters anymore. + * + * (2) We analyze each potential mark types, in order by their precedence. + * + * In each md_analyze_XXX() function, we re-iterate list of the marks, + * skipping already resolved regions (in preceding precedences) and try to + * resolve them. + * + * (2.1) For trivial marks, which are single (e.g. HTML entity), we just mark + * them as resolved. + * + * (2.2) For range-type marks, we analyze whether the mark could be closer + * and, if yes, whether there is some preceding opener it could satisfy. + * + * If not we check whether it could be really an opener and if yes, we + * remember it so subsequent closers may resolve it. + * + * (3) Finally, when all marks were analyzed, we render the block contents + * by calling MD_RENDERER::text() callback, interrupting by ::enter_span() + * or ::close_span() whenever we reach a resolved mark. + */ + + +/* The mark structure. + * + * '\\': Maybe escape sequence. + * '\0': null char. + * '*': Maybe (strong) emphasis start/end. + * '_': Maybe (strong) emphasis start/end. + * '~': Maybe strikethrough start/end (needs MD_FLAG_STRIKETHROUGH). + * '`': Maybe code span start/end. + * '&': Maybe start of entity. + * ';': Maybe end of entity. + * '<': Maybe start of raw HTML or autolink. + * '>': Maybe end of raw HTML or autolink. + * '[': Maybe start of link label or link text. + * '!': Equivalent of '[' for image. + * ']': Maybe end of link label or link text. + * '@': Maybe permissive e-mail auto-link (needs MD_FLAG_PERMISSIVEEMAILAUTOLINKS). + * ':': Maybe permissive URL auto-link (needs MD_FLAG_PERMISSIVEURLAUTOLINKS). + * '.': Maybe permissive WWW auto-link (needs MD_FLAG_PERMISSIVEWWWAUTOLINKS). + * 'D': Dummy mark, it reserves a space for splitting a previous mark + * (e.g. emphasis) or to make more space for storing some special data + * related to the preceding mark (e.g. link). + * + * Note that not all instances of these chars in the text imply creation of the + * structure. Only those which have (or may have, after we see more context) + * the special meaning. + * + * (Keep this struct as small as possible to fit as much of them into CPU + * cache line.) + */ + +struct MD_MARK { + OFF beg; + OFF end; + + /* For unresolved openers, 'prev' and 'next' form the chain of open openers + * of given type 'ch'. + * + * During resolving, we disconnect from the chain and point to the + * corresponding counterpart so opener points to its closer and vice versa. + */ + int prev; + int next; + CHAR ch; + ubyte flags; +}; + +/* Mark flags (these apply to ALL mark types). */ +enum MD_MARK_POTENTIAL_OPENER = 0x01; /* Maybe opener. */ +enum MD_MARK_POTENTIAL_CLOSER = 0x02; /* Maybe closer. */ +enum MD_MARK_OPENER = 0x04; /* Definitely opener. */ +enum MD_MARK_CLOSER = 0x08; /* Definitely closer. */ +enum MD_MARK_RESOLVED = 0x10; /* Resolved in any definite way. */ + +/* Mark flags specific for various mark types (so they can share bits). */ +enum MD_MARK_EMPH_INTRAWORD = 0x20; /* Helper for the "rule of 3". */ +enum MD_MARK_EMPH_MOD3_0 = 0x40; +enum MD_MARK_EMPH_MOD3_1 = 0x80; +enum MD_MARK_EMPH_MOD3_2 = (0x40 | 0x80); +enum MD_MARK_EMPH_MOD3_MASK = (0x40 | 0x80); +enum MD_MARK_AUTOLINK = 0x20; /* Distinguisher for '<', '>'. */ +enum MD_MARK_VALIDPERMISSIVEAUTOLINK = 0x20; /* For permissive autolinks. */ + +MD_MARKCHAIN* md_asterisk_chain(MD_CTX* ctx, uint flags) +{ + switch(flags & (MD_MARK_EMPH_INTRAWORD | MD_MARK_EMPH_MOD3_MASK)) + { + case MD_MARK_EMPH_INTRAWORD | MD_MARK_EMPH_MOD3_0: return ctx.ASTERISK_OPENERS_intraword_mod3_0; + case MD_MARK_EMPH_INTRAWORD | MD_MARK_EMPH_MOD3_1: return ctx.ASTERISK_OPENERS_intraword_mod3_1; + case MD_MARK_EMPH_INTRAWORD | MD_MARK_EMPH_MOD3_2: return ctx.ASTERISK_OPENERS_intraword_mod3_2; + case MD_MARK_EMPH_MOD3_0: return ctx.ASTERISK_OPENERS_extraword_mod3_0; + case MD_MARK_EMPH_MOD3_1: return ctx.ASTERISK_OPENERS_extraword_mod3_1; + case MD_MARK_EMPH_MOD3_2: return ctx.ASTERISK_OPENERS_extraword_mod3_2; + default: assert(false); + } +} + +MD_MARKCHAIN* md_mark_chain(MD_CTX* ctx, int mark_index) +{ + MD_MARK* mark = &ctx.marks[mark_index]; + + switch(mark.ch) { + case '*': return md_asterisk_chain(ctx, mark.flags); + case '_': return ctx.UNDERSCORE_OPENERS; + case '~': return ctx.TILDE_OPENERS; + case '[': return ctx.BRACKET_OPENERS; + case '|': return ctx.TABLECELLBOUNDARIES; + default: return null; + } +} + +MD_MARK* md_push_mark(MD_CTX* ctx) +{ + if(ctx.n_marks >= ctx.alloc_marks) { + MD_MARK* new_marks; + + ctx.alloc_marks = (ctx.alloc_marks > 0 ? ctx.alloc_marks * 2 : 64); + new_marks = cast(MD_MARK*) realloc_safe(ctx.marks, ctx.alloc_marks * MD_MARK.sizeof); + if(new_marks == null) { + ctx.MD_LOG("realloc() failed."); + return null; + } + + ctx.marks = new_marks; + } + + return &ctx.marks[ctx.n_marks++]; +} + +int PUSH_MARK_(MD_CTX* ctx, MD_MARK** mark) +{ + *mark = md_push_mark(ctx); + if(*mark == null) + { + return -1; + } + return 0; +} + +int PUSH_MARK(MD_CTX* ctx, MD_MARK** mark, CHAR ch_, OFF beg_, OFF end_, int flags_) +{ + int ret = PUSH_MARK_(ctx, mark); + if (ret != 0) + return ret; + + (*mark).beg = (beg_); + (*mark).end = (end_); + (*mark).prev = -1; + (*mark).next = -1; + (*mark).ch = cast(char)(ch_); + (*mark).flags = cast(ubyte)flags_; + return 0; +} + +static void +md_mark_chain_append(MD_CTX* ctx, MD_MARKCHAIN* chain, int mark_index) +{ + if(chain.tail >= 0) + ctx.marks[chain.tail].next = mark_index; + else + chain.head = mark_index; + + ctx.marks[mark_index].prev = chain.tail; + chain.tail = mark_index; +} + +/* Sometimes, we need to store a pointer into the mark. It is quite rare + * so we do not bother to make MD_MARK use union, and it can only happen + * for dummy marks. */ +void md_mark_store_ptr(MD_CTX* ctx, int mark_index, const(void)* ptr) +{ + MD_MARK* mark = &ctx.marks[mark_index]; + assert(mark.ch == 'D'); + + /* Check only members beg and end are misused for this. */ + assert((void*).sizeof <= 2 * OFF.sizeof); + memcpy(mark, &ptr, (void*).sizeof); +} + +static void* +md_mark_get_ptr(MD_CTX* ctx, int mark_index) +{ + void* ptr; + MD_MARK* mark = &ctx.marks[mark_index]; + assert(mark.ch == 'D'); + memcpy(&ptr, mark, (void*).sizeof); + return ptr; +} + +static void +md_resolve_range(MD_CTX* ctx, MD_MARKCHAIN* chain, int opener_index, int closer_index) +{ + MD_MARK* opener = &ctx.marks[opener_index]; + MD_MARK* closer = &ctx.marks[closer_index]; + + /* Remove opener from the list of openers. */ + if(chain != null) { + if(opener.prev >= 0) + ctx.marks[opener.prev].next = opener.next; + else + chain.head = opener.next; + + if(opener.next >= 0) + ctx.marks[opener.next].prev = opener.prev; + else + chain.tail = opener.prev; + } + + /* Interconnect opener and closer and mark both as resolved. */ + opener.next = closer_index; + opener.flags |= MD_MARK_OPENER | MD_MARK_RESOLVED; + closer.prev = opener_index; + closer.flags |= MD_MARK_CLOSER | MD_MARK_RESOLVED; +} + + +enum MD_ROLLBACK_ALL = 0; +enum MD_ROLLBACK_CROSSING = 1; + +/* In the range ctx.marks[opener_index] ... [closer_index], undo some or all + * resolvings accordingly to these rules: + * + * (1) All openers BEFORE the range corresponding to any closer inside the + * range are un-resolved and they are re-added to their respective chains + * of unresolved openers. This ensures we can reuse the opener for closers + * AFTER the range. + * + * (2) If 'how' is MD_ROLLBACK_ALL, then ALL resolved marks inside the range + * are discarded. + * + * (3) If 'how' is MD_ROLLBACK_CROSSING, only closers with openers handled + * in (1) are discarded. I.e. pairs of openers and closers which are both + * inside the range are retained as well as any unpaired marks. + */ +static void +md_rollback(MD_CTX* ctx, int opener_index, int closer_index, int how) +{ + int i; + int mark_index; + + /* Cut all unresolved openers at the mark index. */ + for(i = OPENERS_CHAIN_FIRST; i < OPENERS_CHAIN_LAST+1; i++) { + MD_MARKCHAIN* chain = &ctx.mark_chains[i]; + + while(chain.tail >= opener_index) + chain.tail = ctx.marks[chain.tail].prev; + + if(chain.tail >= 0) + ctx.marks[chain.tail].next = -1; + else + chain.head = -1; + } + + /* Go backwards so that un-resolved openers are re-added into their + * respective chains, in the right order. */ + mark_index = closer_index - 1; + while(mark_index > opener_index) { + MD_MARK* mark = &ctx.marks[mark_index]; + int mark_flags = mark.flags; + int discard_flag = (how == MD_ROLLBACK_ALL); + + if(mark.flags & MD_MARK_CLOSER) { + int mark_opener_index = mark.prev; + + /* Undo opener BEFORE the range. */ + if(mark_opener_index < opener_index) { + MD_MARK* mark_opener = &ctx.marks[mark_opener_index]; + MD_MARKCHAIN* chain; + + mark_opener.flags &= ~(MD_MARK_OPENER | MD_MARK_CLOSER | MD_MARK_RESOLVED); + chain = md_mark_chain(ctx, opener_index); + if(chain != null) { + md_mark_chain_append(ctx, chain, mark_opener_index); + discard_flag = 1; + } + } + } + + /* And reset our flags. */ + if(discard_flag) + mark.flags &= ~(MD_MARK_OPENER | MD_MARK_CLOSER | MD_MARK_RESOLVED); + + /* Jump as far as we can over unresolved or non-interesting marks. */ + switch(how) { + case MD_ROLLBACK_CROSSING: + if((mark_flags & MD_MARK_CLOSER) && mark.prev > opener_index) { + /* If we are closer with opener INSIDE the range, there may + * not be any other crosser inside the subrange. */ + mark_index = mark.prev; + break; + } + goto default; + /* Pass through. */ + default: + mark_index--; + break; + } + } +} + +void md_build_mark_char_map(MD_CTX* ctx) +{ + memset(ctx.mark_char_map.ptr, 0, ctx.mark_char_map.length); + + ctx.mark_char_map['\\'] = 1; + ctx.mark_char_map['*'] = 1; + ctx.mark_char_map['_'] = 1; + ctx.mark_char_map['`'] = 1; + ctx.mark_char_map['&'] = 1; + ctx.mark_char_map[';'] = 1; + ctx.mark_char_map['<'] = 1; + ctx.mark_char_map['>'] = 1; + ctx.mark_char_map['['] = 1; + ctx.mark_char_map['!'] = 1; + ctx.mark_char_map[']'] = 1; + ctx.mark_char_map['\0'] = 1; + + if(ctx.parser.flags & MD_FLAG_STRIKETHROUGH) + ctx.mark_char_map['~'] = 1; + + if(ctx.parser.flags & MD_FLAG_LATEXMATHSPANS) + ctx.mark_char_map['$'] = 1; + + if(ctx.parser.flags & MD_FLAG_PERMISSIVEEMAILAUTOLINKS) + ctx.mark_char_map['@'] = 1; + + if(ctx.parser.flags & MD_FLAG_PERMISSIVEURLAUTOLINKS) + ctx.mark_char_map[':'] = 1; + + if(ctx.parser.flags & MD_FLAG_PERMISSIVEWWWAUTOLINKS) + ctx.mark_char_map['.'] = 1; + + if(ctx.parser.flags & MD_FLAG_TABLES) + ctx.mark_char_map['|'] = 1; + + if(ctx.parser.flags & MD_FLAG_COLLAPSEWHITESPACE) { + int i; + + for(i = 0; i < cast(int) (ctx.mark_char_map).sizeof; i++) { + if(ISWHITESPACE_(cast(CHAR)i)) + ctx.mark_char_map[i] = 1; + } + } +} + +/* We limit code span marks to lower then 32 backticks. This solves the + * pathologic case of too many openers, each of different length: Their + * resolving would be then O(n^2). */ +enum CODESPAN_MARK_MAXLEN = 32; + +int md_is_code_span(MD_CTX* ctx, const(MD_LINE)* lines, int n_lines, OFF beg, + OFF* p_opener_beg, OFF* p_opener_end, + OFF* p_closer_beg, OFF* p_closer_end, + OFF* last_potential_closers, + int* p_reached_paragraph_end) +{ + OFF opener_beg = beg; + OFF opener_end; + OFF closer_beg; + OFF closer_end; + SZ mark_len; + OFF line_end; + int has_space_after_opener = FALSE; + int has_eol_after_opener = FALSE; + int has_space_before_closer = FALSE; + int has_eol_before_closer = FALSE; + int has_only_space = TRUE; + int line_index = 0; + + line_end = lines[0].end; + opener_end = opener_beg; + while(opener_end < line_end && ctx.CH(opener_end) == '`') + opener_end++; + has_space_after_opener = (opener_end < line_end && ctx.CH(opener_end) == ' '); + has_eol_after_opener = (opener_end == line_end); + + /* The caller needs to know end of the opening mark even if we fail. */ + *p_opener_end = opener_end; + + mark_len = opener_end - opener_beg; + if(mark_len > CODESPAN_MARK_MAXLEN) + return FALSE; + + /* Check whether we already know there is no closer of this length. + * If so, re-scan does no sense. This fixes issue #59. */ + if(last_potential_closers[mark_len-1] >= lines[n_lines-1].end || + (*p_reached_paragraph_end && last_potential_closers[mark_len-1] < opener_end)) + return FALSE; + + closer_beg = opener_end; + closer_end = opener_end; + + /* Find closer mark. */ + while(TRUE) { + while(closer_beg < line_end && ctx.CH(closer_beg) != '`') { + if(ctx.CH(closer_beg) != ' ') + has_only_space = FALSE; + closer_beg++; + } + closer_end = closer_beg; + while(closer_end < line_end && ctx.CH(closer_end) == '`') + closer_end++; + + if(closer_end - closer_beg == mark_len) { + /* Success. */ + has_space_before_closer = (closer_beg > lines[line_index].beg && ctx.CH(closer_beg-1) == ' '); + has_eol_before_closer = (closer_beg == lines[line_index].beg); + break; + } + + if(closer_end - closer_beg > 0) { + /* We have found a back-tick which is not part of the closer. */ + has_only_space = FALSE; + + /* But if we eventually fail, remember it as a potential closer + * of its own length for future attempts. This mitigates needs for + * rescans. */ + if(closer_end - closer_beg < CODESPAN_MARK_MAXLEN) { + if(closer_beg > last_potential_closers[closer_end - closer_beg - 1]) + last_potential_closers[closer_end - closer_beg - 1] = closer_beg; + } + } + + if(closer_end >= line_end) { + line_index++; + if(line_index >= n_lines) { + /* Reached end of the paragraph and still nothing. */ + *p_reached_paragraph_end = TRUE; + return FALSE; + } + /* Try on the next line. */ + line_end = lines[line_index].end; + closer_beg = lines[line_index].beg; + } else { + closer_beg = closer_end; + } + } + + /* If there is a space or a new line both after and before the opener + * (and if the code span is not made of spaces only), consume one initial + * and one trailing space as part of the marks. */ + if(!has_only_space && + (has_space_after_opener || has_eol_after_opener) && + (has_space_before_closer || has_eol_before_closer)) + { + if(has_space_after_opener) + opener_end++; + else + opener_end = lines[1].beg; + + if(has_space_before_closer) + closer_beg--; + else { + closer_beg = lines[line_index-1].end; + /* We need to eat the preceding "\r\n" but not any line trailing + * spaces. */ + while(closer_beg < ctx.size && ctx.ISBLANK(closer_beg)) + closer_beg++; + } + } + + *p_opener_beg = opener_beg; + *p_opener_end = opener_end; + *p_closer_beg = closer_beg; + *p_closer_end = closer_end; + return TRUE; +} + +static int +md_is_autolink_uri(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end) +{ + OFF off = beg+1; + + assert(ctx.CH(beg) == '<'); + + /* Check for scheme. */ + if(off >= max_end || !ctx.ISASCII(off)) + return FALSE; + off++; + while(1) { + if(off >= max_end) + return FALSE; + if(off - beg > 32) + return FALSE; + if(ctx.CH(off) == ':' && off - beg >= 3) + break; + if(!ctx.ISALNUM(off) && ctx.CH(off) != '+' && ctx.CH(off) != '-' && ctx.CH(off) != '.') + return FALSE; + off++; + } + + /* Check the path after the scheme. */ + while(off < max_end && ctx.CH(off) != '>') { + if(ctx.ISWHITESPACE(off) || ctx.ISCNTRL(off) || ctx.CH(off) == '<') + return FALSE; + off++; + } + + if(off >= max_end) + return FALSE; + + assert(ctx.CH(off) == '>'); + *p_end = off+1; + return TRUE; +} + +static int +md_is_autolink_email(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end) +{ + OFF off = beg + 1; + int label_len; + + assert(ctx.CH(beg) == '<'); + + /* The code should correspond to this regexp: + /^[a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+ + @[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])? + (?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$/ + */ + + /* Username (before '@'). */ + while(off < max_end && (ctx.ISALNUM(off) || ctx.ISANYOF(off, ".!#$%&'*+/=?^_`{|}~-"))) + off++; + if(off <= beg+1) + return FALSE; + + /* '@' */ + if(off >= max_end || ctx.CH(off) != '@') + return FALSE; + off++; + + /* Labels delimited with '.'; each label is sequence of 1 - 62 alnum + * characters or '-', but '-' is not allowed as first or last char. */ + label_len = 0; + while(off < max_end) { + if(ctx.ISALNUM(off)) + label_len++; + else if(ctx.CH(off) == '-' && label_len > 0) + label_len++; + else if(ctx.CH(off) == '.' && label_len > 0 && ctx.CH(off-1) != '-') + label_len = 0; + else + break; + + if(label_len > 62) + return FALSE; + + off++; + } + + if(label_len <= 0 || off >= max_end || ctx.CH(off) != '>' || ctx.CH(off-1) == '-') + return FALSE; + + *p_end = off+1; + return TRUE; +} + +static int +md_is_autolink(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end, int* p_missing_mailto) +{ + if(md_is_autolink_uri(ctx, beg, max_end, p_end)) { + *p_missing_mailto = FALSE; + return TRUE; + } + + if(md_is_autolink_email(ctx, beg, max_end, p_end)) { + *p_missing_mailto = TRUE; + return TRUE; + } + + return FALSE; +} + +/* For 8-bit encodings, mark_char_map[] covers all 256 elements. */ +bool IS_MARK_CHAR(MD_CTX* ctx, OFF off) +{ + return (ctx.mark_char_map[cast(ubyte) ctx.CH(off)]) != 0; +} + +int md_collect_marks(MD_CTX* ctx, const(MD_LINE)* lines, int n_lines, int table_mode) +{ + int i; + int ret = 0; + MD_MARK* mark; + OFF[CODESPAN_MARK_MAXLEN] codespan_last_potential_closers = + [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]; + + int codespan_scanned_till_paragraph_end = FALSE; + + for(i = 0; i < n_lines; i++) + { + const(MD_LINE)* line = &lines[i]; + OFF off = line.beg; + OFF line_end = line.end; + + while(true) + { + CHAR ch; + + /* Optimization: Use some loop unrolling. */ + while(off + 3 < line_end && !IS_MARK_CHAR(ctx, off+0) && !IS_MARK_CHAR(ctx, off+1) + && !IS_MARK_CHAR(ctx, off+2) && !IS_MARK_CHAR(ctx, off+3)) + off += 4; + while(off < line_end && !IS_MARK_CHAR(ctx, off+0)) + off++; + + if(off >= line_end) + break; + + ch = ctx.CH(off); + + /* A backslash escape. + * It can go beyond line.end as it may involve escaped new + * line to form a hard break. */ + if(ch == '\\' && off+1 < ctx.size && (ctx.ISPUNCT(off+1) || ctx.ISNEWLINE(off+1))) { + /* Hard-break cannot be on the last line of the block. */ + if(!ctx.ISNEWLINE(off+1) || i+1 < n_lines) + { + ret = PUSH_MARK(ctx, &mark, ch, off, off+2, MD_MARK_RESOLVED); + if (ret != 0) goto abort; + } + off += 2; + continue; + } + + /* A potential (string) emphasis start/end. */ + if(ch == '*' || ch == '_') { + OFF tmp = off+1; + int left_level; /* What precedes: 0 = whitespace; 1 = punctuation; 2 = other char. */ + int right_level; /* What follows: 0 = whitespace; 1 = punctuation; 2 = other char. */ + + while(tmp < line_end && ctx.CH(tmp) == ch) + tmp++; + + if(off == line.beg || ctx.ISUNICODEWHITESPACEBEFORE(off)) + left_level = 0; + else if(ctx.ISUNICODEPUNCTBEFORE(off)) + left_level = 1; + else + left_level = 2; + + if(tmp == line_end || ctx.ISUNICODEWHITESPACE(tmp)) + right_level = 0; + else if(ctx.ISUNICODEPUNCT(tmp)) + right_level = 1; + else + right_level = 2; + + /* Intra-word underscore doesn't have special meaning. */ + if(ch == '_' && left_level == 2 && right_level == 2) { + left_level = 0; + right_level = 0; + } + + if(left_level != 0 || right_level != 0) { + uint flags = 0; + + if(left_level > 0 && left_level >= right_level) + flags |= MD_MARK_POTENTIAL_CLOSER; + if(right_level > 0 && right_level >= left_level) + flags |= MD_MARK_POTENTIAL_OPENER; + if(left_level == 2 && right_level == 2) + flags |= MD_MARK_EMPH_INTRAWORD; + + /* For "the rule of three" we need to remember the original + * size of the mark (modulo three), before we potentially + * split the mark when being later resolved partially by some + * shorter closer. */ + switch((tmp - off) % 3) + { + case 0: flags |= MD_MARK_EMPH_MOD3_0; break; + case 1: flags |= MD_MARK_EMPH_MOD3_1; break; + case 2: flags |= MD_MARK_EMPH_MOD3_2; break; + default: break; + } + + ret = PUSH_MARK(ctx, &mark, ch, off, tmp, flags); + if (ret != 0) goto abort; + + /* During resolving, multiple asterisks may have to be + * split into independent span start/ends. Consider e.g. + * "**foo* bar*". Therefore we push also some empty dummy + * marks to have enough space for that. */ + off++; + while(off < tmp) { + ret = PUSH_MARK(ctx, &mark, 'D', off, off, 0); + if (ret != 0) goto abort; + off++; + } + continue; + } + + off = tmp; + continue; + } + + /* A potential code span start/end. */ + if(ch == '`') { + OFF opener_beg, opener_end; + OFF closer_beg, closer_end; + int is_code_span; + + is_code_span = md_is_code_span(ctx, lines + i, n_lines - i, off, + &opener_beg, &opener_end, &closer_beg, &closer_end, + codespan_last_potential_closers.ptr, + &codespan_scanned_till_paragraph_end); + if(is_code_span) { + ret = PUSH_MARK(ctx, &mark, '`', opener_beg, opener_end, MD_MARK_OPENER | MD_MARK_RESOLVED); + if (ret != 0) goto abort; + ret = PUSH_MARK(ctx, &mark, '`', closer_beg, closer_end, MD_MARK_CLOSER | MD_MARK_RESOLVED); + if (ret != 0) goto abort; + ctx.marks[ctx.n_marks-2].next = ctx.n_marks-1; + ctx.marks[ctx.n_marks-1].prev = ctx.n_marks-2; + + off = closer_end; + + /* Advance the current line accordingly. */ + while(off > line_end) { + i++; + line++; + line_end = line.end; + } + continue; + } + + off = opener_end; + continue; + } + + /* A potential entity start. */ + if(ch == '&') { + ret = PUSH_MARK(ctx, &mark, ch, off, off+1, MD_MARK_POTENTIAL_OPENER); + if (ret != 0) goto abort; + off++; + continue; + } + + /* A potential entity end. */ + if(ch == ';') { + /* We surely cannot be entity unless the previous mark is '&'. */ + if(ctx.n_marks > 0 && ctx.marks[ctx.n_marks-1].ch == '&') + { + ret = PUSH_MARK(ctx, &mark, ch, off, off+1, MD_MARK_POTENTIAL_CLOSER); + if (ret != 0) goto abort; + } + + off++; + continue; + } + + /* A potential autolink or raw HTML start/end. */ + if(ch == '<') { + int is_autolink; + OFF autolink_end; + int missing_mailto; + + if(!(ctx.parser.flags & MD_FLAG_NOHTMLSPANS)) { + int is_html; + OFF html_end; + + /* Given the nature of the raw HTML, we have to recognize + * it here. Doing so later in md_analyze_lt_gt() could + * open can of worms of quadratic complexity. */ + is_html = md_is_html_any(ctx, lines + i, n_lines - i, off, + lines[n_lines-1].end, &html_end); + if(is_html) { + ret = PUSH_MARK(ctx, &mark, '<', off, off, MD_MARK_OPENER | MD_MARK_RESOLVED); + if (ret != 0) goto abort; + ret = PUSH_MARK(ctx, &mark, '>', html_end, html_end, MD_MARK_CLOSER | MD_MARK_RESOLVED); + if (ret != 0) goto abort; + ctx.marks[ctx.n_marks-2].next = ctx.n_marks-1; + ctx.marks[ctx.n_marks-1].prev = ctx.n_marks-2; + off = html_end; + + /* Advance the current line accordingly. */ + while(off > line_end) { + i++; + line++; + line_end = line.end; + } + continue; + } + } + + is_autolink = md_is_autolink(ctx, off, lines[n_lines-1].end, + &autolink_end, &missing_mailto); + if(is_autolink) { + ret = PUSH_MARK(ctx, &mark, (missing_mailto ? '@' : '<'), off, off+1, + MD_MARK_OPENER | MD_MARK_RESOLVED | MD_MARK_AUTOLINK); + if (ret != 0) goto abort; + ret = PUSH_MARK(ctx, &mark, '>', autolink_end-1, autolink_end, + MD_MARK_CLOSER | MD_MARK_RESOLVED | MD_MARK_AUTOLINK); + if (ret != 0) goto abort; + ctx.marks[ctx.n_marks-2].next = ctx.n_marks-1; + ctx.marks[ctx.n_marks-1].prev = ctx.n_marks-2; + off = autolink_end; + continue; + } + + off++; + continue; + } + + /* A potential link or its part. */ + if(ch == '[' || (ch == '!' && off+1 < line_end && ctx.CH(off+1) == '[')) { + OFF tmp = (ch == '[' ? off+1 : off+2); + ret = PUSH_MARK(ctx, &mark, ch, off, tmp, MD_MARK_POTENTIAL_OPENER); + if (ret != 0) goto abort; + off = tmp; + /* Two dummies to make enough place for data we need if it is + * a link. */ + ret = PUSH_MARK(ctx, &mark, 'D', off, off, 0); + if (ret != 0) goto abort; + ret = PUSH_MARK(ctx, &mark, 'D', off, off, 0); + if (ret != 0) goto abort; + continue; + } + if(ch == ']') { + ret = PUSH_MARK(ctx, &mark, ch, off, off+1, MD_MARK_POTENTIAL_CLOSER); + if (ret != 0) goto abort; + off++; + continue; + } + + /* A potential permissive e-mail autolink. */ + if(ch == '@') { + if(line.beg + 1 <= off && ctx.ISALNUM(off-1) && + off + 3 < line.end && ctx.ISALNUM(off+1)) + { + ret = PUSH_MARK(ctx, &mark, ch, off, off+1, MD_MARK_POTENTIAL_OPENER); + if (ret != 0) goto abort; + /* Push a dummy as a reserve for a closer. */ + ret = PUSH_MARK(ctx, &mark, 'D', off, off, 0); + if (ret != 0) goto abort; + } + + off++; + continue; + } + + /* A potential permissive URL autolink. */ + if(ch == ':') + { + static struct Scheme + { + const(CHAR)* scheme; + SZ scheme_size; + const(CHAR)* suffix; + SZ suffix_size; + } + + static immutable Scheme[] scheme_map = + [ + Scheme("http", 4, "//", 2), + Scheme("https", 5, "//", 2), + Scheme("ftp", 3, "//", 2) + ]; + + int scheme_index; + + for(scheme_index = 0; scheme_index < cast(int) (scheme_map.length); scheme_index++) { + const(CHAR)* scheme = scheme_map[scheme_index].scheme; + const SZ scheme_size = scheme_map[scheme_index].scheme_size; + const(CHAR)* suffix = scheme_map[scheme_index].suffix; + const SZ suffix_size = scheme_map[scheme_index].suffix_size; + + if(line.beg + scheme_size <= off && md_ascii_eq(ctx.STR(off-scheme_size), scheme, scheme_size) && + (line.beg + scheme_size == off || ctx.ISWHITESPACE(off-scheme_size-1) || ctx.ISANYOF(off-scheme_size-1, "*_~([")) && + off + 1 + suffix_size < line.end && md_ascii_eq(ctx.STR(off+1), suffix, suffix_size)) + { + ret = PUSH_MARK(ctx, &mark, ch, off-scheme_size, off+1+suffix_size, MD_MARK_POTENTIAL_OPENER); + if (ret != 0) goto abort; + /* Push a dummy as a reserve for a closer. */ + ret = PUSH_MARK(ctx, &mark, 'D', off, off, 0); + if (ret != 0) goto abort; + off += 1 + suffix_size; + continue; + } + } + + off++; + continue; + } + + /* A potential permissive WWW autolink. */ + if(ch == '.') { + if(line.beg + 3 <= off && md_ascii_eq(ctx.STR(off-3), "www", 3) && + (line.beg + 3 == off || ctx.ISWHITESPACE(off-4) || ctx.ISANYOF(off-4, "*_~([")) && + off + 1 < line_end) + { + ret = PUSH_MARK(ctx, &mark, ch, off-3, off+1, MD_MARK_POTENTIAL_OPENER); + if (ret != 0) goto abort; + /* Push a dummy as a reserve for a closer. */ + ret = PUSH_MARK(ctx, &mark, 'D', off, off, 0); + if (ret != 0) goto abort; + off++; + continue; + } + + off++; + continue; + } + + /* A potential table cell boundary. */ + if(table_mode && ch == '|') { + ret = PUSH_MARK(ctx, &mark, ch, off, off+1, 0); + if (ret != 0) goto abort; + off++; + continue; + } + + /* A potential strikethrough start/end. */ + if(ch == '~') { + OFF tmp = off+1; + + while(tmp < line_end && ctx.CH(tmp) == '~') + tmp++; + + ret = PUSH_MARK(ctx, &mark, ch, off, tmp, MD_MARK_POTENTIAL_OPENER | MD_MARK_POTENTIAL_CLOSER); + if (ret != 0) goto abort; + off = tmp; + continue; + } + + /* A potential equation start/end */ + if(ch == '$') { + /* We can have at most two consecutive $ signs, + * where two dollar signs signify a display equation. */ + OFF tmp = off+1; + + while(tmp < line_end && ctx.CH(tmp) == '$') + tmp++; + + if (tmp - off <= 2) + { + ret = PUSH_MARK(ctx, &mark, ch, off, tmp, MD_MARK_POTENTIAL_OPENER | MD_MARK_POTENTIAL_CLOSER); + if (ret != 0) goto abort; + } + off = tmp; + continue; + } + + /* Turn non-trivial whitespace into single space. */ + if(ISWHITESPACE_(ch)) { + OFF tmp = off+1; + + while(tmp < line_end && ctx.ISWHITESPACE(tmp)) + tmp++; + + if(tmp - off > 1 || ch != ' ') + { + ret = PUSH_MARK(ctx, &mark, ch, off, tmp, MD_MARK_RESOLVED); + if (ret != 0) goto abort; + } + + off = tmp; + continue; + } + + /* null character. */ + if(ch == '\0') { + ret = PUSH_MARK(ctx, &mark, ch, off, off+1, MD_MARK_RESOLVED); + if (ret != 0) goto abort; + off++; + continue; + } + + off++; + } + } + + /* Add a dummy mark at the end of the mark vector to simplify + * process_inlines(). */ + ret = PUSH_MARK(ctx, &mark, 127, ctx.size, ctx.size, MD_MARK_RESOLVED); + if (ret != 0) goto abort; + +abort: + return ret; +} + +static void +md_analyze_bracket(MD_CTX* ctx, int mark_index) +{ + /* We cannot really resolve links here as for that we would need + * more context. E.g. a following pair of brackets (reference link), + * or enclosing pair of brackets (if the inner is the link, the outer + * one cannot be.) + * + * Therefore we here only construct a list of resolved '[' ']' pairs + * ordered by position of the closer. This allows ur to analyze what is + * or is not link in the right order, from inside to outside in case + * of nested brackets. + * + * The resolving itself is deferred into md_resolve_links(). + */ + + MD_MARK* mark = &ctx.marks[mark_index]; + + if(mark.flags & MD_MARK_POTENTIAL_OPENER) { + md_mark_chain_append(ctx, ctx.BRACKET_OPENERS, mark_index); + return; + } + + if(ctx.BRACKET_OPENERS.tail >= 0) { + /* Pop the opener from the chain. */ + int opener_index = ctx.BRACKET_OPENERS.tail; + MD_MARK* opener = &ctx.marks[opener_index]; + if(opener.prev >= 0) + ctx.marks[opener.prev].next = -1; + else + ctx.BRACKET_OPENERS.head = -1; + ctx.BRACKET_OPENERS.tail = opener.prev; + + /* Interconnect the opener and closer. */ + opener.next = mark_index; + mark.prev = opener_index; + + /* Add the pair into chain of potential links for md_resolve_links(). + * Note we misuse opener.prev for this as opener.next points to its + * closer. */ + if(ctx.unresolved_link_tail >= 0) + ctx.marks[ctx.unresolved_link_tail].prev = opener_index; + else + ctx.unresolved_link_head = opener_index; + ctx.unresolved_link_tail = opener_index; + opener.prev = -1; + } +} + +/* Forward declaration. */ +static void md_analyze_link_contents(MD_CTX* ctx, const MD_LINE* lines, int n_lines, + int mark_beg, int mark_end); + +static int +md_resolve_links(MD_CTX* ctx, const MD_LINE* lines, int n_lines) +{ + int opener_index = ctx.unresolved_link_head; + OFF last_link_beg = 0; + OFF last_link_end = 0; + OFF last_img_beg = 0; + OFF last_img_end = 0; + + while(opener_index >= 0) { + MD_MARK* opener = &ctx.marks[opener_index]; + int closer_index = opener.next; + MD_MARK* closer = &ctx.marks[closer_index]; + int next_index = opener.prev; + MD_MARK* next_opener; + MD_MARK* next_closer; + MD_LINK_ATTR attr; + int is_link = FALSE; + + if(next_index >= 0) { + next_opener = &ctx.marks[next_index]; + next_closer = &ctx.marks[next_opener.next]; + } else { + next_opener = null; + next_closer = null; + } + + /* If nested ("[ [ ] ]"), we need to make sure that: + * - The outer does not end inside of (...) belonging to the inner. + * - The outer cannot be link if the inner is link (i.e. not image). + * + * (Note we here analyze from inner to outer as the marks are ordered + * by closer.beg.) + */ + if((opener.beg < last_link_beg && closer.end < last_link_end) || + (opener.beg < last_img_beg && closer.end < last_img_end) || + (opener.beg < last_link_end && opener.ch == '[')) + { + opener_index = next_index; + continue; + } + + if(next_opener != null && next_opener.beg == closer.end) { + if(next_closer.beg > closer.end + 1) { + /* Might be full reference link. */ + is_link = md_is_link_reference(ctx, lines, n_lines, next_opener.beg, next_closer.end, &attr); + } else { + /* Might be shortcut reference link. */ + is_link = md_is_link_reference(ctx, lines, n_lines, opener.beg, closer.end, &attr); + } + + if(is_link < 0) + return -1; + + if(is_link) { + /* Eat the 2nd "[...]". */ + closer.end = next_closer.end; + } + } else { + if(closer.end < ctx.size && ctx.CH(closer.end) == '(') { + /* Might be inline link. */ + OFF inline_link_end = uint.max; + + is_link = md_is_inline_link_spec(ctx, lines, n_lines, closer.end, &inline_link_end, &attr); + if(is_link < 0) + return -1; + + /* Check the closing ')' is not inside an already resolved range + * (i.e. a range with a higher priority), e.g. a code span. */ + if(is_link) { + int i = closer_index + 1; + + while(i < ctx.n_marks) { + MD_MARK* mark = &ctx.marks[i]; + + if(mark.beg >= inline_link_end) + break; + if((mark.flags & (MD_MARK_OPENER | MD_MARK_RESOLVED)) == (MD_MARK_OPENER | MD_MARK_RESOLVED)) { + if(ctx.marks[mark.next].beg >= inline_link_end) { + /* Cancel the link status. */ + if(attr.title_needs_free) + free(cast(void*)(attr.title)); + is_link = FALSE; + break; + } + + i = mark.next + 1; + } else { + i++; + } + } + } + + if(is_link) { + /* Eat the "(...)" */ + closer.end = inline_link_end; + } + } + + if(!is_link) { + /* Might be collapsed reference link. */ + is_link = md_is_link_reference(ctx, lines, n_lines, opener.beg, closer.end, &attr); + if(is_link < 0) + return -1; + } + } + + if(is_link) { + /* Resolve the brackets as a link. */ + opener.flags |= MD_MARK_OPENER | MD_MARK_RESOLVED; + closer.flags |= MD_MARK_CLOSER | MD_MARK_RESOLVED; + + /* If it is a link, we store the destination and title in the two + * dummy marks after the opener. */ + assert(ctx.marks[opener_index+1].ch == 'D'); + ctx.marks[opener_index+1].beg = attr.dest_beg; + ctx.marks[opener_index+1].end = attr.dest_end; + + assert(ctx.marks[opener_index+2].ch == 'D'); + md_mark_store_ptr(ctx, opener_index+2, attr.title); + if(attr.title_needs_free) + md_mark_chain_append(ctx, ctx.PTR_CHAIN, opener_index+2); + ctx.marks[opener_index+2].prev = attr.title_size; + + if(opener.ch == '[') { + last_link_beg = opener.beg; + last_link_end = closer.end; + } else { + last_img_beg = opener.beg; + last_img_end = closer.end; + } + + md_analyze_link_contents(ctx, lines, n_lines, opener_index+1, closer_index); + } + + opener_index = next_index; + } + + return 0; +} + +/* Analyze whether the mark '&' starts a HTML entity. + * If so, update its flags as well as flags of corresponding closer ';'. */ +static void +md_analyze_entity(MD_CTX* ctx, int mark_index) +{ + MD_MARK* opener = &ctx.marks[mark_index]; + MD_MARK* closer; + OFF off; + + /* Cannot be entity if there is no closer as the next mark. + * (Any other mark between would mean strange character which cannot be + * part of the entity. + * + * So we can do all the work on '&' and do not call this later for the + * closing mark ';'. + */ + if(mark_index + 1 >= ctx.n_marks) + return; + closer = &ctx.marks[mark_index+1]; + if(closer.ch != ';') + return; + + if(md_is_entity(ctx, opener.beg, closer.end, &off)) { + assert(off == closer.end); + + md_resolve_range(ctx, null, mark_index, mark_index+1); + opener.end = closer.end; + } +} + +static void +md_analyze_table_cell_boundary(MD_CTX* ctx, int mark_index) +{ + MD_MARK* mark = &ctx.marks[mark_index]; + mark.flags |= MD_MARK_RESOLVED; + + md_mark_chain_append(ctx, ctx.TABLECELLBOUNDARIES, mark_index); + ctx.n_table_cell_boundaries++; +} + +/* Split a longer mark into two. The new mark takes the given count of + * characters. May only be called if an adequate number of dummy 'D' marks + * follows. + */ +static int +md_split_emph_mark(MD_CTX* ctx, int mark_index, SZ n) +{ + MD_MARK* mark = &ctx.marks[mark_index]; + int new_mark_index = mark_index + (mark.end - mark.beg - n); + MD_MARK* dummy = &ctx.marks[new_mark_index]; + + assert(mark.end - mark.beg > n); + assert(dummy.ch == 'D'); + + memcpy(dummy, mark, MD_MARK.sizeof); + mark.end -= n; + dummy.beg = mark.end; + + return new_mark_index; +} + +static void +md_analyze_emph(MD_CTX* ctx, int mark_index) +{ + MD_MARK* mark = &ctx.marks[mark_index]; + MD_MARKCHAIN* chain = md_mark_chain(ctx, mark_index); + + /* If we can be a closer, try to resolve with the preceding opener. */ + if(mark.flags & MD_MARK_POTENTIAL_CLOSER) { + MD_MARK* opener = null; + int opener_index; + + if(mark.ch == '*') { + MD_MARKCHAIN*[6] opener_chains; + int i, n_opener_chains; + uint flags = mark.flags; + + /* Apply "rule of three". (This is why we break asterisk opener + * marks into multiple chains.) */ + n_opener_chains = 0; + opener_chains[n_opener_chains++] = ctx.ASTERISK_OPENERS_intraword_mod3_0; + if((flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_2) + opener_chains[n_opener_chains++] = ctx.ASTERISK_OPENERS_intraword_mod3_1; + if((flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_1) + opener_chains[n_opener_chains++] = ctx.ASTERISK_OPENERS_intraword_mod3_2; + opener_chains[n_opener_chains++] = ctx.ASTERISK_OPENERS_extraword_mod3_0; + if(!(flags & MD_MARK_EMPH_INTRAWORD) || (flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_2) + opener_chains[n_opener_chains++] = ctx.ASTERISK_OPENERS_extraword_mod3_1; + if(!(flags & MD_MARK_EMPH_INTRAWORD) || (flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_1) + opener_chains[n_opener_chains++] = ctx.ASTERISK_OPENERS_extraword_mod3_2; + + /* Opener is the most recent mark from the allowed chains. */ + for(i = 0; i < n_opener_chains; i++) { + if(opener_chains[i].tail >= 0) { + int tmp_index = opener_chains[i].tail; + MD_MARK* tmp_mark = &ctx.marks[tmp_index]; + if(opener == null || tmp_mark.end > opener.end) { + opener_index = tmp_index; + opener = tmp_mark; + } + } + } + } else { + /* Simple emph. mark */ + if(chain.tail >= 0) { + opener_index = chain.tail; + opener = &ctx.marks[opener_index]; + } + } + + /* Resolve, if we have found matching opener. */ + if(opener != null) { + SZ opener_size = opener.end - opener.beg; + SZ closer_size = mark.end - mark.beg; + + if(opener_size > closer_size) { + opener_index = md_split_emph_mark(ctx, opener_index, closer_size); + md_mark_chain_append(ctx, md_mark_chain(ctx, opener_index), opener_index); + } else if(opener_size < closer_size) { + md_split_emph_mark(ctx, mark_index, closer_size - opener_size); + } + + md_rollback(ctx, opener_index, mark_index, MD_ROLLBACK_CROSSING); + md_resolve_range(ctx, chain, opener_index, mark_index); + return; + } + } + + /* If we could not resolve as closer, we may be yet be an opener. */ + if(mark.flags & MD_MARK_POTENTIAL_OPENER) + md_mark_chain_append(ctx, chain, mark_index); +} + +static void +md_analyze_tilde(MD_CTX* ctx, int mark_index) +{ + /* We attempt to be Github Flavored Markdown compatible here. GFM says + * that length of the tilde sequence is not important at all. Note that + * implies the ctx.TILDE_OPENERS chain can have at most one item. */ + + if(ctx.TILDE_OPENERS.head >= 0) { + /* The chain already contains an opener, so we may resolve the span. */ + int opener_index = ctx.TILDE_OPENERS.head; + + md_rollback(ctx, opener_index, mark_index, MD_ROLLBACK_CROSSING); + md_resolve_range(ctx, ctx.TILDE_OPENERS, opener_index, mark_index); + } else { + /* We can only be opener. */ + md_mark_chain_append(ctx, ctx.TILDE_OPENERS, mark_index); + } +} + +static void +md_analyze_dollar(MD_CTX* ctx, int mark_index) +{ + /* This should mimic the way inline equations work in LaTeX, so there + * can only ever be one item in the chain (i.e. the dollars can't be + * nested). This is basically the same as the md_analyze_tilde function, + * except that we require matching openers and closers to be of the same + * length. + * + * E.g.: $abc$$def$$ => abc (display equation) def (end equation) */ + if(ctx.DOLLAR_OPENERS.head >= 0) { + /* If the potential closer has a non-matching number of $, discard */ + MD_MARK* open = &ctx.marks[ctx.DOLLAR_OPENERS.head]; + MD_MARK* close = &ctx.marks[mark_index]; + + int opener_index = ctx.DOLLAR_OPENERS.head; + md_rollback(ctx, opener_index, mark_index, MD_ROLLBACK_ALL); + if (open.end - open.beg == close.end - close.beg) { + /* We are the matching closer */ + md_resolve_range(ctx, ctx.DOLLAR_OPENERS, opener_index, mark_index); + } else { + /* We don't match the opener, so discard old opener and insert as opener */ + md_mark_chain_append(ctx, ctx.DOLLAR_OPENERS, mark_index); + } + } else { + /* No unmatched openers, so we are opener */ + md_mark_chain_append(ctx, ctx.DOLLAR_OPENERS, mark_index); + } +} + +static void +md_analyze_permissive_url_autolink(MD_CTX* ctx, int mark_index) +{ + MD_MARK* opener = &ctx.marks[mark_index]; + int closer_index = mark_index + 1; + MD_MARK* closer = &ctx.marks[closer_index]; + MD_MARK* next_resolved_mark; + OFF off = opener.end; + int n_dots = FALSE; + int has_underscore_in_last_seg = FALSE; + int has_underscore_in_next_to_last_seg = FALSE; + int n_opened_parenthesis = 0; + + /* Check for domain. */ + while(off < ctx.size) { + if(ctx.ISALNUM(off) || ctx.CH(off) == '-') { + off++; + } else if(ctx.CH(off) == '.') { + /* We must see at least one period. */ + n_dots++; + has_underscore_in_next_to_last_seg = has_underscore_in_last_seg; + has_underscore_in_last_seg = FALSE; + off++; + } else if(ctx.CH(off) == '_') { + /* No underscore may be present in the last two domain segments. */ + has_underscore_in_last_seg = TRUE; + off++; + } else { + break; + } + } + if(off > opener.end && ctx.CH(off-1) == '.') { + off--; + n_dots--; + } + if(off <= opener.end || n_dots == 0 || has_underscore_in_next_to_last_seg || has_underscore_in_last_seg) + return; + + /* Check for path. */ + next_resolved_mark = closer + 1; + while(next_resolved_mark.ch == 'D' || !(next_resolved_mark.flags & MD_MARK_RESOLVED)) + next_resolved_mark++; + while(off < next_resolved_mark.beg && ctx.CH(off) != '<' && !ctx.ISWHITESPACE(off) && !ctx.ISNEWLINE(off)) { + /* Parenthesis must be balanced. */ + if(ctx.CH(off) == '(') { + n_opened_parenthesis++; + } else if(ctx.CH(off) == ')') { + if(n_opened_parenthesis > 0) + n_opened_parenthesis--; + else + break; + } + + off++; + } + /* These cannot be last char In such case they are more likely normal + * punctuation. */ + if(ctx.ISANYOF(off-1, "?!.,:*_~")) + off--; + + /* Ok. Lets call it auto-link. Adapt opener and create closer to zero + * length so all the contents becomes the link text. */ + assert(closer.ch == 'D'); + opener.end = opener.beg; + closer.ch = opener.ch; + closer.beg = off; + closer.end = off; + md_resolve_range(ctx, null, mark_index, closer_index); +} + +/* The permissive autolinks do not have to be enclosed in '<' '>' but we + * instead impose stricter rules what is understood as an e-mail address + * here. Actually any non-alphanumeric characters with exception of '.' + * are prohibited both in username and after '@'. */ +static void +md_analyze_permissive_email_autolink(MD_CTX* ctx, int mark_index) +{ + MD_MARK* opener = &ctx.marks[mark_index]; + int closer_index; + MD_MARK* closer; + OFF beg = opener.beg; + OFF end = opener.end; + int dot_count = 0; + + assert(ctx.CH(beg) == '@'); + + /* Scan for name before '@'. */ + while(beg > 0 && (ctx.ISALNUM(beg-1) || ctx.ISANYOF(beg-1, ".-_+"))) + beg--; + + /* Scan for domain after '@'. */ + while(end < ctx.size && (ctx.ISALNUM(end) || ctx.ISANYOF(end, ".-_"))) { + if(ctx.CH(end) == '.') + dot_count++; + end++; + } + if(ctx.CH(end-1) == '.') { /* Final '.' not part of it. */ + dot_count--; + end--; + } + else if(ctx.ISANYOF2(end-1, '-', '_')) /* These are forbidden at the end. */ + return; + if(ctx.CH(end-1) == '@' || dot_count == 0) + return; + + /* Ok. Lets call it auto-link. Adapt opener and create closer to zero + * length so all the contents becomes the link text. */ + closer_index = mark_index + 1; + closer = &ctx.marks[closer_index]; + assert(closer.ch == 'D'); + + opener.beg = beg; + opener.end = beg; + closer.ch = opener.ch; + closer.beg = end; + closer.end = end; + md_resolve_range(ctx, null, mark_index, closer_index); +} + +static void +md_analyze_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines, + int mark_beg, int mark_end, const(CHAR)* mark_chars) +{ + int i = mark_beg; + + while(i < mark_end) { + MD_MARK* mark = &ctx.marks[i]; + + /* Skip resolved spans. */ + if(mark.flags & MD_MARK_RESOLVED) { + if(mark.flags & MD_MARK_OPENER) { + assert(i < mark.next); + i = mark.next + 1; + } else { + i++; + } + continue; + } + + /* Skip marks we do not want to deal with. */ + if(!ISANYOF_(mark.ch, mark_chars)) { + i++; + continue; + } + + /* Analyze the mark. */ + switch(mark.ch) { + case '[': /* Pass through. */ + case '!': /* Pass through. */ + case ']': md_analyze_bracket(ctx, i); break; + case '&': md_analyze_entity(ctx, i); break; + case '|': md_analyze_table_cell_boundary(ctx, i); break; + case '_': /* Pass through. */ + case '*': md_analyze_emph(ctx, i); break; + case '~': md_analyze_tilde(ctx, i); break; + case '$': md_analyze_dollar(ctx, i); break; + case '.': /* Pass through. */ + case ':': md_analyze_permissive_url_autolink(ctx, i); break; + case '@': md_analyze_permissive_email_autolink(ctx, i); break; + default: break; + } + + i++; + } +} + +/* Analyze marks (build ctx.marks). */ +static int +md_analyze_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines, int table_mode) +{ + int ret; + + /* Reset the previously collected stack of marks. */ + ctx.n_marks = 0; + + /* Collect all marks. */ + ret = (md_collect_marks(ctx, lines, n_lines, table_mode)); + if (ret < 0) goto abort; + + /* We analyze marks in few groups to handle their precedence. */ + /* (1) Entities; code spans; autolinks; raw HTML. */ + md_analyze_marks(ctx, lines, n_lines, 0, ctx.n_marks, "&"); + + if(table_mode) { + /* (2) Analyze table cell boundaries. + * Note we reset ctx.TABLECELLBOUNDARIES chain prior to the call md_analyze_marks(), + * not after, because caller may need it. */ + assert(n_lines == 1); + ctx.TABLECELLBOUNDARIES.head = -1; + ctx.TABLECELLBOUNDARIES.tail = -1; + ctx.n_table_cell_boundaries = 0; + md_analyze_marks(ctx, lines, n_lines, 0, ctx.n_marks, "|"); + return ret; + } + + /* (3) Links. */ + md_analyze_marks(ctx, lines, n_lines, 0, ctx.n_marks, "[]!"); + ret = (md_resolve_links(ctx, lines, n_lines)); + if (ret < 0) goto abort; + ctx.BRACKET_OPENERS.head = -1; + ctx.BRACKET_OPENERS.tail = -1; + ctx.unresolved_link_head = -1; + ctx.unresolved_link_tail = -1; + + /* (4) Emphasis and strong emphasis; permissive autolinks. */ + md_analyze_link_contents(ctx, lines, n_lines, 0, ctx.n_marks); + +abort: + return ret; +} + +static void +md_analyze_link_contents(MD_CTX* ctx, const MD_LINE* lines, int n_lines, + int mark_beg, int mark_end) +{ + md_analyze_marks(ctx, lines, n_lines, mark_beg, mark_end, "*_~$@:."); + ctx.ASTERISK_OPENERS_extraword_mod3_0.head = -1; + ctx.ASTERISK_OPENERS_extraword_mod3_0.tail = -1; + ctx.ASTERISK_OPENERS_extraword_mod3_1.head = -1; + ctx.ASTERISK_OPENERS_extraword_mod3_1.tail = -1; + ctx.ASTERISK_OPENERS_extraword_mod3_2.head = -1; + ctx.ASTERISK_OPENERS_extraword_mod3_2.tail = -1; + ctx.ASTERISK_OPENERS_intraword_mod3_0.head = -1; + ctx.ASTERISK_OPENERS_intraword_mod3_0.tail = -1; + ctx.ASTERISK_OPENERS_intraword_mod3_1.head = -1; + ctx.ASTERISK_OPENERS_intraword_mod3_1.tail = -1; + ctx.ASTERISK_OPENERS_intraword_mod3_2.head = -1; + ctx.ASTERISK_OPENERS_intraword_mod3_2.tail = -1; + ctx.UNDERSCORE_OPENERS.head = -1; + ctx.UNDERSCORE_OPENERS.tail = -1; + ctx.TILDE_OPENERS.head = -1; + ctx.TILDE_OPENERS.tail = -1; + ctx.DOLLAR_OPENERS.head = -1; + ctx.DOLLAR_OPENERS.tail = -1; +} + +static int +md_enter_leave_span_a(MD_CTX* ctx, int enter, MD_SPANTYPE type, + const(CHAR)* dest, SZ dest_size, int prohibit_escapes_in_dest, + const(CHAR)* title, SZ title_size) +{ + MD_ATTRIBUTE_BUILD href_build = MD_ATTRIBUTE_BUILD.init; + MD_ATTRIBUTE_BUILD title_build = MD_ATTRIBUTE_BUILD.init; + MD_SPAN_A_DETAIL det; + int ret = 0; + + /* Note we here rely on fact that MD_SPAN_A_DETAIL and + * MD_SPAN_IMG_DETAIL are binary-compatible. */ + memset(&det, 0, MD_SPAN_A_DETAIL.sizeof); + ret = (md_build_attribute(ctx, dest, dest_size, + (prohibit_escapes_in_dest ? MD_BUILD_ATTR_NO_ESCAPES : 0), + &det.href, &href_build)); + if (ret < 0) goto abort; + ret = (md_build_attribute(ctx, title, title_size, 0, &det.title, &title_build)); + if (ret < 0) goto abort; + + if(enter) + { + ret = MD_ENTER_SPAN(ctx, type, &det); + if (ret != 0) goto abort; + } + else + { + ret = MD_LEAVE_SPAN(ctx, type, &det); + if (ret != 0) goto abort; + } + +abort: + md_free_attribute(ctx, &href_build); + md_free_attribute(ctx, &title_build); + return ret; +} + +/* Render the output, accordingly to the analyzed ctx.marks. */ +static int +md_process_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines) +{ + MD_TEXTTYPE text_type; + const(MD_LINE)* line = lines; + MD_MARK* prev_mark = null; + MD_MARK* mark; + OFF off = lines[0].beg; + OFF end = lines[n_lines-1].end; + int enforce_hardbreak = 0; + int ret = 0; + + /* Find first resolved mark. Note there is always at least one resolved + * mark, the dummy last one after the end of the latest line we actually + * never really reach. This saves us of a lot of special checks and cases + * in this function. */ + mark = ctx.marks; + while(!(mark.flags & MD_MARK_RESOLVED)) + mark++; + + text_type = MD_TEXT_NORMAL; + + while(1) { + /* Process the text up to the next mark or end-of-line. */ + OFF tmp = (line.end < mark.beg ? line.end : mark.beg); + if(tmp > off) { + ret = MD_TEXT(ctx, text_type, ctx.STR(off), tmp - off); + if (ret != 0) goto abort; + off = tmp; + } + + /* If reached the mark, process it and move to next one. */ + if(off >= mark.beg) { + switch(mark.ch) { + case '\\': /* Backslash escape. */ + if(ctx.ISNEWLINE(mark.beg+1)) + enforce_hardbreak = 1; + else + { + ret = MD_TEXT(ctx, text_type, ctx.STR(mark.beg+1), 1); + if (ret != 0) goto abort; + } + break; + + case ' ': /* Non-trivial space. */ + ret = MD_TEXT(ctx, text_type, " ", 1); + if (ret != 0) goto abort; + break; + + case '`': /* Code span. */ + if(mark.flags & MD_MARK_OPENER) { + ret = MD_ENTER_SPAN(ctx, MD_SPAN_CODE, null); + if (ret != 0) goto abort; + text_type = MD_TEXT_CODE; + } else { + ret = MD_LEAVE_SPAN(ctx, MD_SPAN_CODE, null); + if (ret != 0) goto abort; + text_type = MD_TEXT_NORMAL; + } + break; + + case '_': + case '*': /* Emphasis, strong emphasis. */ + if(mark.flags & MD_MARK_OPENER) { + if((mark.end - off) % 2) { + ret = MD_ENTER_SPAN(ctx, MD_SPAN_EM, null); + if (ret != 0) goto abort; + off++; + } + while(off + 1 < mark.end) { + ret = MD_ENTER_SPAN(ctx, MD_SPAN_STRONG, null); + if (ret != 0) goto abort; + off += 2; + } + } else { + while(off + 1 < mark.end) { + ret = MD_LEAVE_SPAN(ctx, MD_SPAN_STRONG, null); + if (ret != 0) goto abort; + off += 2; + } + if((mark.end - off) % 2) { + ret = MD_LEAVE_SPAN(ctx, MD_SPAN_EM, null); + if (ret != 0) goto abort; + off++; + } + } + break; + + case '~': + if(mark.flags & MD_MARK_OPENER) + { + ret = MD_ENTER_SPAN(ctx, MD_SPAN_DEL, null); + if (ret != 0) goto abort; + } + else + { + ret = MD_LEAVE_SPAN(ctx, MD_SPAN_DEL, null); + if (ret != 0) goto abort; + } + break; + + case '$': + if(mark.flags & MD_MARK_OPENER) { + ret = MD_ENTER_SPAN(ctx, (mark.end - off) % 2 ? MD_SPAN_LATEXMATH : MD_SPAN_LATEXMATH_DISPLAY, null); + if (ret != 0) goto abort; + text_type = MD_TEXT_LATEXMATH; + } else { + ret = MD_LEAVE_SPAN(ctx, (mark.end - off) % 2 ? MD_SPAN_LATEXMATH : MD_SPAN_LATEXMATH_DISPLAY, null); + if (ret != 0) goto abort; + text_type = MD_TEXT_NORMAL; + } + break; + + case '[': /* Link, image. */ + case '!': + case ']': + { + const MD_MARK* opener = (mark.ch != ']' ? mark : &ctx.marks[mark.prev]); + const MD_MARK* dest_mark = opener+1; + const MD_MARK* title_mark = opener+2; + + assert(dest_mark.ch == 'D'); + assert(title_mark.ch == 'D'); + + ret = (md_enter_leave_span_a(ctx, (mark.ch != ']') ? 1 : 0, + (opener.ch == '!' ? MD_SPAN_IMG : MD_SPAN_A), + ctx.STR(dest_mark.beg), dest_mark.end - dest_mark.beg, FALSE, + cast(char*) md_mark_get_ptr(ctx, cast(int)(title_mark - ctx.marks)), title_mark.prev)); + if (ret < 0) goto abort; + + /* link/image closer may span multiple lines. */ + if(mark.ch == ']') { + while(mark.end > line.end) + line++; + } + + break; + } + + case '<': + case '>': /* Autolink or raw HTML. */ + if(!(mark.flags & MD_MARK_AUTOLINK)) { + /* Raw HTML. */ + if(mark.flags & MD_MARK_OPENER) + text_type = MD_TEXT_HTML; + else + text_type = MD_TEXT_NORMAL; + break; + } + /* Pass through, if auto-link. */ + goto case '.'; + + case '@': /* Permissive e-mail autolink. */ + case ':': /* Permissive URL autolink. */ + case '.': /* Permissive WWW autolink. */ + { + MD_MARK* opener = ((mark.flags & MD_MARK_OPENER) ? mark : &ctx.marks[mark.prev]); + MD_MARK* closer = &ctx.marks[opener.next]; + const(CHAR)* dest = ctx.STR(opener.end); + SZ dest_size = closer.beg - opener.end; + + /* For permissive auto-links we do not know closer mark + * position at the time of md_collect_marks(), therefore + * it can be out-of-order in ctx.marks[]. + * + * With this flag, we make sure that we output the closer + * only if we processed the opener. */ + if(mark.flags & MD_MARK_OPENER) + closer.flags |= MD_MARK_VALIDPERMISSIVEAUTOLINK; + + if(opener.ch == '@' || opener.ch == '.') { + dest_size += 7; + ret = MD_TEMP_BUFFER(ctx, dest_size * CHAR.sizeof); + if (ret < 0) goto abort; + memcpy(ctx.buffer, + (opener.ch == '@' ? "mailto:" : "http://").ptr, + 7 * CHAR.sizeof); + memcpy(ctx.buffer + 7, dest, (dest_size-7) * CHAR.sizeof); + dest = ctx.buffer; + } + + if(closer.flags & MD_MARK_VALIDPERMISSIVEAUTOLINK) + { + ret = (md_enter_leave_span_a(ctx, (mark.flags & MD_MARK_OPENER), + MD_SPAN_A, dest, dest_size, TRUE, null, 0)); + if (ret < 0) goto abort; + } + break; + } + + case '&': /* Entity. */ + ret = MD_TEXT(ctx, MD_TEXT_ENTITY, ctx.STR(mark.beg), mark.end - mark.beg); + if (ret != 0) goto abort; + break; + + case '\0': + ret = MD_TEXT(ctx, MD_TEXT_NULLCHAR, "", 1); + if (ret != 0) goto abort; + break; + + case 127: + goto abort; + + default: + break; + } + + off = mark.end; + + /* Move to next resolved mark. */ + prev_mark = mark; + mark++; + while(!(mark.flags & MD_MARK_RESOLVED) || mark.beg < off) + mark++; + } + + /* If reached end of line, move to next one. */ + if(off >= line.end) { + /* If it is the last line, we are done. */ + if(off >= end) + break; + + if(text_type == MD_TEXT_CODE || text_type == MD_TEXT_LATEXMATH) { + OFF tmp2; + + assert(prev_mark != null); + assert(ISANYOF2_(prev_mark.ch, '`', '$') && (prev_mark.flags & MD_MARK_OPENER)); + assert(ISANYOF2_(mark.ch, '`', '$') && (mark.flags & MD_MARK_CLOSER)); + + /* Inside a code span, trailing line whitespace has to be + * outputted. */ + tmp2 = off; + while(off < ctx.size && ctx.ISBLANK(off)) + off++; + if(off > tmp2) + { + ret = MD_TEXT(ctx, text_type, ctx.STR(tmp2), off-tmp2); + if (ret != 0) goto abort; + } + + /* and new lines are transformed into single spaces. */ + if(prev_mark.end < off && off < mark.beg) + { + ret = MD_TEXT(ctx, text_type, " ", 1); + if (ret != 0) goto abort; + } + } else if(text_type == MD_TEXT_HTML) { + /* Inside raw HTML, we output the new line verbatim, including + * any trailing spaces. */ + OFF tmp2 = off; + + while(tmp2 < end && ctx.ISBLANK(tmp2)) + tmp2++; + if(tmp2 > off) + { + ret = MD_TEXT(ctx, MD_TEXT_HTML, ctx.STR(off), tmp2 - off); + if (ret != 0) goto abort; + } + ret = MD_TEXT(ctx, MD_TEXT_HTML, "\n", 1); + if (ret != 0) goto abort; + } else { + /* Output soft or hard line break. */ + MD_TEXTTYPE break_type = MD_TEXT_SOFTBR; + + if(text_type == MD_TEXT_NORMAL) { + if(enforce_hardbreak) + break_type = MD_TEXT_BR; + else if((ctx.CH(line.end) == ' ' && ctx.CH(line.end+1) == ' ')) + break_type = MD_TEXT_BR; + } + + ret = MD_TEXT(ctx, break_type, "\n", 1); + if (ret != 0) goto abort; + } + + /* Move to the next line. */ + line++; + off = line.beg; + + enforce_hardbreak = 0; + } + } + +abort: + return ret; +} + + +/*************************** + *** Processing Tables *** + ***************************/ + +void md_analyze_table_alignment(MD_CTX* ctx, OFF beg, OFF end, MD_ALIGN* align_, int n_align) +{ + static immutable MD_ALIGN[] align_map = + [ + MD_ALIGN_DEFAULT, + MD_ALIGN_LEFT, + MD_ALIGN_RIGHT, + MD_ALIGN_CENTER + ]; + OFF off = beg; + + while(n_align > 0) { + int index = 0; /* index into align_map[] */ + + while(ctx.CH(off) != '-') + off++; + if(off > beg && ctx.CH(off-1) == ':') + index |= 1; + while(off < end && ctx.CH(off) == '-') + off++; + if(off < end && ctx.CH(off) == ':') + index |= 2; + + *align_ = align_map[index]; + align_++; + n_align--; + } + +} + +int md_process_table_cell(MD_CTX* ctx, MD_BLOCKTYPE cell_type, MD_ALIGN align_, OFF beg, OFF end) +{ + MD_LINE line; + MD_BLOCK_TD_DETAIL det; + int ret = 0; + + while(beg < end && ctx.ISWHITESPACE(beg)) + beg++; + while(end > beg && ctx.ISWHITESPACE(end-1)) + end--; + + det.align_ = align_; + line.beg = beg; + line.end = end; + + ret = MD_ENTER_BLOCK(ctx, cell_type, &det); + if (ret != 0) goto abort; + ret = (md_process_normal_block_contents(ctx, &line, 1)); + if (ret < 0) goto abort; + ret = MD_LEAVE_BLOCK(ctx, cell_type, &det); + if (ret != 0) goto abort; + +abort: + return ret; +} + +int md_process_table_row(MD_CTX* ctx, MD_BLOCKTYPE cell_type, OFF beg, OFF end, + const MD_ALIGN* align_, int col_count) +{ + MD_LINE line; + OFF* pipe_offs = null; + int i, j, n; + int ret = 0; + + line.beg = beg; + line.end = end; + + /* Break the line into table cells by identifying pipe characters who + * form the cell boundary. */ + ret = (md_analyze_inlines(ctx, &line, 1, TRUE)); + if (ret < 0) goto abort; + + /* We have to remember the cell boundaries in local buffer because + * ctx.marks[] shall be reused during cell contents processing. */ + n = ctx.n_table_cell_boundaries; + pipe_offs = cast(OFF*) malloc(n * OFF.sizeof); + if(pipe_offs == null) { + ctx.MD_LOG("malloc() failed."); + ret = -1; + goto abort; + } + for(i = ctx.TABLECELLBOUNDARIES.head, j = 0; i >= 0; i = ctx.marks[i].next) { + MD_MARK* mark = &ctx.marks[i]; + pipe_offs[j++] = mark.beg; + } + + /* Process cells. */ + ret = MD_ENTER_BLOCK(ctx, MD_BLOCK_TR, null); + if (ret != 0) goto abort; + + j = 0; + if(beg < pipe_offs[0] && j < col_count) + { + ret = (md_process_table_cell(ctx, cell_type, align_[j++], beg, pipe_offs[0])); + if (ret < 0) goto abort; + } + for(i = 0; i < n-1 && j < col_count; i++) + { + ret = (md_process_table_cell(ctx, cell_type, align_[j++], pipe_offs[i]+1, pipe_offs[i+1])); + if (ret < 0) goto abort; + } + if(pipe_offs[n-1] < end-1 && j < col_count) + { + ret = (md_process_table_cell(ctx, cell_type, align_[j++], pipe_offs[n-1]+1, end)); + if (ret < 0) goto abort; + } + /* Make sure we call enough table cells even if the current table contains + * too few of them. */ + while(j < col_count) + { + ret = (md_process_table_cell(ctx, cell_type, align_[j++], 0, 0)); + if (ret < 0) goto abort; + } + + ret = MD_LEAVE_BLOCK(ctx, MD_BLOCK_TR, null); + if (ret != 0) goto abort; + +abort: + free(pipe_offs); + + /* Free any temporary memory blocks stored within some dummy marks. */ + for(i = ctx.PTR_CHAIN.head; i >= 0; i = ctx.marks[i].next) + free(md_mark_get_ptr(ctx, i)); + ctx.PTR_CHAIN.head = -1; + ctx.PTR_CHAIN.tail = -1; + + return ret; +} + +int md_process_table_block_contents(MD_CTX* ctx, int col_count, const MD_LINE* lines, int n_lines) +{ + MD_ALIGN* align_; + int i; + int ret = 0; + + /* At least two lines have to be present: The column headers and the line + * with the underlines. */ + assert(n_lines >= 2); + + align_ = cast(MD_ALIGN*) malloc(col_count * MD_ALIGN.sizeof); + if(align_ == null) { + ctx.MD_LOG("malloc() failed."); + ret = -1; + goto abort; + } + + md_analyze_table_alignment(ctx, lines[1].beg, lines[1].end, align_, col_count); + + ret = MD_ENTER_BLOCK(ctx, MD_BLOCK_THEAD, null); + if (ret != 0) goto abort; + ret = (md_process_table_row(ctx, MD_BLOCK_TH, + lines[0].beg, lines[0].end, align_, col_count)); + if (ret < 0) goto abort; + ret = MD_LEAVE_BLOCK(ctx, MD_BLOCK_THEAD, null); + if (ret != 0) goto abort; + + ret = MD_ENTER_BLOCK(ctx, MD_BLOCK_TBODY, null); + if (ret != 0) goto abort; + for(i = 2; i < n_lines; i++) { + ret = (md_process_table_row(ctx, MD_BLOCK_TD, + lines[i].beg, lines[i].end, align_, col_count)); + if (ret < 0) goto abort; + } + ret = MD_LEAVE_BLOCK(ctx, MD_BLOCK_TBODY, null); + if (ret != 0) goto abort; + +abort: + free(align_); + return ret; +} + +int md_is_table_row(MD_CTX* ctx, OFF beg, OFF* p_end) +{ + MD_LINE line; + int i; + int ret = FALSE; + + line.beg = beg; + line.end = beg; + + /* Find end of line. */ + while(line.end < ctx.size && !ctx.ISNEWLINE(line.end)) + line.end++; + + ret = (md_analyze_inlines(ctx, &line, 1, TRUE)); + if (ret < 0) goto abort; + + if(ctx.TABLECELLBOUNDARIES.head >= 0) { + if(p_end != null) + *p_end = line.end; + ret = TRUE; + } + +abort: + /* Free any temporary memory blocks stored within some dummy marks. */ + for(i = ctx.PTR_CHAIN.head; i >= 0; i = ctx.marks[i].next) + free(md_mark_get_ptr(ctx, i)); + ctx.PTR_CHAIN.head = -1; + ctx.PTR_CHAIN.tail = -1; + + return ret; +} + + +/************************** + *** Processing Block *** + **************************/ + +enum MD_BLOCK_CONTAINER_OPENER = 0x01; +enum MD_BLOCK_CONTAINER_CLOSER = 0x02; +enum MD_BLOCK_CONTAINER = (MD_BLOCK_CONTAINER_OPENER | MD_BLOCK_CONTAINER_CLOSER); +enum MD_BLOCK_LOOSE_LIST = 0x04; +enum MD_BLOCK_SETEXT_HEADER = 0x08; + +struct MD_BLOCK +{ +nothrow: +@nogc: + ubyte type_; + ubyte flags_; + ushort data_; + + MD_BLOCKTYPE type() const { return type_; } + void type(MD_BLOCKTYPE value) { type_ = cast(ubyte)value; } + + uint flags() const { return flags_; } + void flags(uint value) { flags_ = cast(ubyte)value; } + + /* MD_BLOCK_H: Header level (1 - 6) + * MD_BLOCK_CODE: Non-zero if fenced, zero if indented. + * MD_BLOCK_LI: Task mark character (0 if not task list item, 'x', 'X' or ' '). + * MD_BLOCK_TABLE: Column count (as determined by the table underline). + */ + uint data() const { return data_; } + void data(uint value) { data_ = cast(ubyte)value; } + + /* Leaf blocks: Count of lines (MD_LINE or MD_VERBATIMLINE) on the block. + * MD_BLOCK_LI: Task mark offset in the input doc. + * MD_BLOCK_OL: Start item number. + */ + uint n_lines; +} + +static assert(MD_BLOCK.sizeof == 8); + +struct MD_CONTAINER +{ +nothrow: +@nogc: + + CHAR ch; + + ubyte is_loose_; + ubyte is_task_; + + uint is_loose() { return is_loose_; } + void is_loose(uint value) { is_loose_ = cast(ubyte)value; } + + uint is_task() { return is_task_; } + void is_task(uint value) { is_task_ = cast(ubyte)value; } + + uint start; + uint mark_indent; + uint contents_indent; + OFF block_byte_off; + OFF task_mark_off; +} + + +int md_process_normal_block_contents(MD_CTX* ctx, const MD_LINE* lines, int n_lines) +{ + int i; + int ret; + + ret = (md_analyze_inlines(ctx, lines, n_lines, FALSE)); + if (ret < 0) goto abort; + ret = (md_process_inlines(ctx, lines, n_lines)); + if (ret < 0) goto abort; + +abort: + /* Free any temporary memory blocks stored within some dummy marks. */ + for(i = ctx.PTR_CHAIN.head; i >= 0; i = ctx.marks[i].next) + free(md_mark_get_ptr(ctx, i)); + ctx.PTR_CHAIN.head = -1; + ctx.PTR_CHAIN.tail = -1; + + return ret; +} + +int md_process_verbatim_block_contents(MD_CTX* ctx, MD_TEXTTYPE text_type, const MD_VERBATIMLINE* lines, int n_lines) +{ + static immutable string indent_chunk_str = " "; + + int i; + int ret = 0; + + for(i = 0; i < n_lines; i++) { + const MD_VERBATIMLINE* line = &lines[i]; + int indent = line.indent; + + assert(indent >= 0); + + /* Output code indentation. */ + while(indent > cast(int)(indent_chunk_str.length)) { + ret = MD_TEXT(ctx, text_type, indent_chunk_str.ptr, cast(SZ)(indent_chunk_str.length)); + if (ret != 0) goto abort; + indent -= indent_chunk_str.length; + } + if(indent > 0) + { + ret = MD_TEXT(ctx, text_type, indent_chunk_str.ptr, indent); + if (ret != 0) goto abort; + } + + /* Output the code line itself. */ + ret = MD_TEXT_INSECURE(ctx, text_type, ctx.STR(line.beg), line.end - line.beg); + if (ret != 0) goto abort; + + /* Enforce end-of-line. */ + ret = MD_TEXT(ctx, text_type, "\n", 1); + if (ret != 0) goto abort; + } + +abort: + return ret; +} + +static int +md_process_code_block_contents(MD_CTX* ctx, int is_fenced, const(MD_VERBATIMLINE)* lines, int n_lines) +{ + if(is_fenced) { + /* Skip the first line in case of fenced code: It is the fence. + * (Only the starting fence is present due to logic in md_analyze_line().) */ + lines++; + n_lines--; + } else { + /* Ignore blank lines at start/end of indented code block. */ + while(n_lines > 0 && lines[0].beg == lines[0].end) { + lines++; + n_lines--; + } + while(n_lines > 0 && lines[n_lines-1].beg == lines[n_lines-1].end) { + n_lines--; + } + } + + if(n_lines == 0) + return 0; + + return md_process_verbatim_block_contents(ctx, MD_TEXT_CODE, lines, n_lines); +} + +int md_setup_fenced_code_detail(MD_CTX* ctx, const(MD_BLOCK)* block, MD_BLOCK_CODE_DETAIL* det, + MD_ATTRIBUTE_BUILD* info_build, MD_ATTRIBUTE_BUILD* lang_build) +{ + const(MD_VERBATIMLINE)* fence_line = cast(const(MD_VERBATIMLINE)*)(block + 1); + OFF beg = fence_line.beg; + OFF end = fence_line.end; + OFF lang_end; + CHAR fence_ch = ctx.CH(fence_line.beg); + int ret = 0; + + /* Skip the fence itself. */ + while(beg < ctx.size && ctx.CH(beg) == fence_ch) + beg++; + /* Trim initial spaces. */ + while(beg < ctx.size && ctx.CH(beg) == ' ') + beg++; + + /* Trim trailing spaces. */ + while(end > beg && ctx.CH(end-1) == ' ') + end--; + + /* Build info string attribute. */ + ret = (md_build_attribute(ctx, ctx.STR(beg), end - beg, 0, &det.info, info_build)); + if (ret < 0) goto abort; + + /* Build info string attribute. */ + lang_end = beg; + while(lang_end < end && !ctx.ISWHITESPACE(lang_end)) + lang_end++; + ret = (md_build_attribute(ctx, ctx.STR(beg), lang_end - beg, 0, &det.lang, lang_build)); + if (ret < 0) goto abort; + + det.fence_char = fence_ch; + +abort: + return ret; +} + +static int +md_process_leaf_block(MD_CTX* ctx, const MD_BLOCK* block) +{ + static union HeaderOrCode + { + MD_BLOCK_H_DETAIL header; + MD_BLOCK_CODE_DETAIL code; + } + HeaderOrCode det; + MD_ATTRIBUTE_BUILD info_build; + MD_ATTRIBUTE_BUILD lang_build; + int is_in_tight_list; + int clean_fence_code_detail = FALSE; + int ret = 0; + + memset(&det, 0, det.sizeof); + + if(ctx.n_containers == 0) + is_in_tight_list = FALSE; + else + is_in_tight_list = !ctx.containers[ctx.n_containers-1].is_loose; + + switch(block.type) + { + case MD_BLOCK_H: + det.header.level = block.data; + break; + + case MD_BLOCK_CODE: + /* For fenced code block, we may need to set the info string. */ + if(block.data != 0) { + memset(&det.code, 0, MD_BLOCK_CODE_DETAIL.sizeof); + clean_fence_code_detail = TRUE; + ret = (md_setup_fenced_code_detail(ctx, block, &det.code, &info_build, &lang_build)); + if (ret < 0) goto abort; + } + break; + + default: + /* Noop. */ + break; + } + + if(!is_in_tight_list || block.type != MD_BLOCK_P) + { + ret = MD_ENTER_BLOCK(ctx, block.type, cast(void*) &det); + if (ret != 0) goto abort; + } + + /* Process the block contents accordingly to is type. */ + switch(block.type) { + case MD_BLOCK_HR: + /* noop */ + break; + + case MD_BLOCK_CODE: + ret = (md_process_code_block_contents(ctx, (block.data != 0), + cast(const(MD_VERBATIMLINE)*)(block + 1), block.n_lines)); + if (ret < 0) goto abort; + break; + + case MD_BLOCK_HTML: + ret = (md_process_verbatim_block_contents(ctx, MD_TEXT_HTML, + cast(const(MD_VERBATIMLINE)*)(block + 1), block.n_lines)); + if (ret < 0) goto abort; + break; + + case MD_BLOCK_TABLE: + ret = (md_process_table_block_contents(ctx, block.data, + cast(const(MD_LINE)*)(block + 1), block.n_lines)); + if (ret < 0) goto abort; + break; + + default: + ret = (md_process_normal_block_contents(ctx, + cast(const(MD_LINE)*)(block + 1), block.n_lines)); + if (ret < 0) goto abort; + break; + } + + if(!is_in_tight_list || block.type != MD_BLOCK_P) + { + ret = MD_LEAVE_BLOCK(ctx, block.type, cast(void*) &det); + if (ret != 0) goto abort; + } + +abort: + if(clean_fence_code_detail) { + md_free_attribute(ctx, &info_build); + md_free_attribute(ctx, &lang_build); + } + return ret; +} + +int md_process_all_blocks(MD_CTX* ctx) +{ + int byte_off = 0; + int ret = 0; + + /* ctx.containers now is not needed for detection of lists and list items + * so we reuse it for tracking what lists are loose or tight. We rely + * on the fact the vector is large enough to hold the deepest nesting + * level of lists. */ + ctx.n_containers = 0; + + while(byte_off < ctx.n_block_bytes) { + MD_BLOCK* block = cast(MD_BLOCK*)(cast(char*)ctx.block_bytes + byte_off); + static union Det + { + MD_BLOCK_UL_DETAIL ul; + MD_BLOCK_OL_DETAIL ol; + MD_BLOCK_LI_DETAIL li; + } + + Det det; + + switch(block.type) { + case MD_BLOCK_UL: + det.ul.is_tight = (block.flags & MD_BLOCK_LOOSE_LIST) ? FALSE : TRUE; + det.ul.mark = cast(CHAR) block.data; + break; + + case MD_BLOCK_OL: + det.ol.start = block.n_lines; + det.ol.is_tight = (block.flags & MD_BLOCK_LOOSE_LIST) ? FALSE : TRUE; + det.ol.mark_delimiter = cast(CHAR) block.data; + break; + + case MD_BLOCK_LI: + det.li.is_task = (block.data != 0); + det.li.task_mark = cast(CHAR) block.data; + det.li.task_mark_offset = cast(OFF) block.n_lines; + break; + + default: + /* noop */ + break; + } + + if(block.flags & MD_BLOCK_CONTAINER) { + if(block.flags & MD_BLOCK_CONTAINER_CLOSER) { + ret = MD_LEAVE_BLOCK(ctx, block.type, &det); + if (ret != 0) goto abort; + + if(block.type == MD_BLOCK_UL || block.type == MD_BLOCK_OL || block.type == MD_BLOCK_QUOTE) + ctx.n_containers--; + } + + if(block.flags & MD_BLOCK_CONTAINER_OPENER) { + ret = MD_ENTER_BLOCK(ctx, block.type, &det); + if (ret != 0) goto abort; + + if(block.type == MD_BLOCK_UL || block.type == MD_BLOCK_OL) { + ctx.containers[ctx.n_containers].is_loose = (block.flags & MD_BLOCK_LOOSE_LIST); + ctx.n_containers++; + } else if(block.type == MD_BLOCK_QUOTE) { + /* This causes that any text in a block quote, even if + * nested inside a tight list item, is wrapped with + *

    ...

    . */ + ctx.containers[ctx.n_containers].is_loose = TRUE; + ctx.n_containers++; + } + } + } else { + ret = (md_process_leaf_block(ctx, block)); + if (ret < 0) goto abort; + + if(block.type == MD_BLOCK_CODE || block.type == MD_BLOCK_HTML) + byte_off += block.n_lines * MD_VERBATIMLINE.sizeof; + else + byte_off += block.n_lines * MD_LINE.sizeof; + } + + byte_off += MD_BLOCK.sizeof; + } + + ctx.n_block_bytes = 0; + +abort: + return ret; +} + + +/************************************ + *** Grouping Lines into Blocks *** + ************************************/ + +static void* +md_push_block_bytes(MD_CTX* ctx, int n_bytes) +{ + void* ptr; + + if(ctx.n_block_bytes + n_bytes > ctx.alloc_block_bytes) { + void* new_block_bytes; + + ctx.alloc_block_bytes = (ctx.alloc_block_bytes > 0 ? ctx.alloc_block_bytes * 2 : 512); + new_block_bytes = realloc_safe(ctx.block_bytes, ctx.alloc_block_bytes); + if(new_block_bytes == null) { + ctx.MD_LOG("realloc() failed."); + return null; + } + + /* Fix the .current_block after the reallocation. */ + if(ctx.current_block != null) { + OFF off_current_block = cast(uint)( cast(char*) ctx.current_block - cast(char*) ctx.block_bytes ); + ctx.current_block = cast(MD_BLOCK*) (cast(char*) new_block_bytes + off_current_block); + } + + ctx.block_bytes = new_block_bytes; + } + + ptr = cast(char*)ctx.block_bytes + ctx.n_block_bytes; + ctx.n_block_bytes += n_bytes; + return ptr; +} + +static int +md_start_new_block(MD_CTX* ctx, const MD_LINE_ANALYSIS* line) +{ + MD_BLOCK* block; + + assert(ctx.current_block == null); + + block = cast(MD_BLOCK*) md_push_block_bytes(ctx, MD_BLOCK.sizeof); + if(block == null) + return -1; + + switch(line.type) { + case MD_LINE_HR: + block.type = MD_BLOCK_HR; + break; + + case MD_LINE_ATXHEADER: + case MD_LINE_SETEXTHEADER: + block.type = MD_BLOCK_H; + break; + + case MD_LINE_FENCEDCODE: + case MD_LINE_INDENTEDCODE: + block.type = MD_BLOCK_CODE; + break; + + case MD_LINE_TEXT: + block.type = MD_BLOCK_P; + break; + + case MD_LINE_HTML: + block.type = MD_BLOCK_HTML; + break; + + case MD_LINE_BLANK: + case MD_LINE_SETEXTUNDERLINE: + case MD_LINE_TABLEUNDERLINE: + default: + assert(false); + } + + block.flags = 0; + block.data = line.data; + block.n_lines = 0; + + ctx.current_block = block; + return 0; +} + +/* Eat from start of current (textual) block any reference definitions and + * remember them so we can resolve any links referring to them. + * + * (Reference definitions can only be at start of it as they cannot break + * a paragraph.) + */ +int md_consume_link_reference_definitions(MD_CTX* ctx) +{ + MD_LINE* lines = cast(MD_LINE*) (ctx.current_block + 1); + int n_lines = ctx.current_block.n_lines; + int n = 0; + + /* Compute how many lines at the start of the block form one or more + * reference definitions. */ + while(n < n_lines) { + int n_link_ref_lines; + + n_link_ref_lines = md_is_link_reference_definition(ctx, + lines + n, n_lines - n); + /* Not a reference definition? */ + if(n_link_ref_lines == 0) + break; + + /* We fail if it is the ref. def. but it could not be stored due + * a memory allocation error. */ + if(n_link_ref_lines < 0) + return -1; + + n += n_link_ref_lines; + } + + /* If there was at least one reference definition, we need to remove + * its lines from the block, or perhaps even the whole block. */ + if(n > 0) { + if(n == n_lines) { + /* Remove complete block. */ + ctx.n_block_bytes -= n * MD_LINE.sizeof; + ctx.n_block_bytes -= MD_BLOCK.sizeof; + ctx.current_block = null; + } else { + /* Remove just some initial lines from the block. */ + memmove(lines, lines + n, (n_lines - n) * MD_LINE.sizeof); + ctx.current_block.n_lines -= n; + ctx.n_block_bytes -= n * MD_LINE.sizeof; + } + } + + return 0; +} + +static int +md_end_current_block(MD_CTX* ctx) +{ + int ret = 0; + + if(ctx.current_block == null) + return ret; + + /* Check whether there is a reference definition. (We do this here instead + * of in md_analyze_line() because reference definition can take multiple + * lines.) */ + if(ctx.current_block.type == MD_BLOCK_P || + (ctx.current_block.type == MD_BLOCK_H && (ctx.current_block.flags & MD_BLOCK_SETEXT_HEADER))) + { + MD_LINE* lines = cast(MD_LINE*) (ctx.current_block + 1); + if(ctx.CH(lines[0].beg) == '[') { + ret = (md_consume_link_reference_definitions(ctx)); + if (ret < 0) goto abort; + if(ctx.current_block == null) + return ret; + } + } + + if(ctx.current_block.type == MD_BLOCK_H && (ctx.current_block.flags & MD_BLOCK_SETEXT_HEADER)) { + int n_lines = ctx.current_block.n_lines; + + if(n_lines > 1) { + /* Get rid of the underline. */ + ctx.current_block.n_lines--; + ctx.n_block_bytes -= MD_LINE.sizeof; + } else { + /* Only the underline has left after eating the ref. defs. + * Keep the line as beginning of a new ordinary paragraph. */ + ctx.current_block.type = MD_BLOCK_P; + return 0; + } + } + + /* Mark we are not building any block anymore. */ + ctx.current_block = null; + +abort: + return ret; +} + +static int +md_add_line_into_current_block(MD_CTX* ctx, const MD_LINE_ANALYSIS* analysis) +{ + assert(ctx.current_block != null); + + if(ctx.current_block.type == MD_BLOCK_CODE || ctx.current_block.type == MD_BLOCK_HTML) { + MD_VERBATIMLINE* line; + + line = cast(MD_VERBATIMLINE*) md_push_block_bytes(ctx, MD_VERBATIMLINE.sizeof); + if(line == null) + return -1; + + line.indent = analysis.indent; + line.beg = analysis.beg; + line.end = analysis.end; + } else { + MD_LINE* line; + + line = cast(MD_LINE*) md_push_block_bytes(ctx, MD_LINE.sizeof); + if(line == null) + return -1; + + line.beg = analysis.beg; + line.end = analysis.end; + } + ctx.current_block.n_lines++; + + return 0; +} + +static int +md_push_container_bytes(MD_CTX* ctx, MD_BLOCKTYPE type, uint start, + uint data, uint flags) +{ + MD_BLOCK* block; + int ret = 0; + + ret = (md_end_current_block(ctx)); + if (ret < 0) goto abort; + + block = cast(MD_BLOCK*) md_push_block_bytes(ctx, MD_BLOCK.sizeof); + if(block == null) + return -1; + + block.type = type; + block.flags = flags; + block.data = data; + block.n_lines = start; + +abort: + return ret; +} + + + +/*********************** + *** Line Analysis *** + ***********************/ + +static int +md_is_hr_line(MD_CTX* ctx, OFF beg, OFF* p_end, OFF* p_killer) +{ + OFF off = beg + 1; + int n = 1; + + while(off < ctx.size && (ctx.CH(off) == ctx.CH(beg) || ctx.CH(off) == ' ' || ctx.CH(off) == '\t')) { + if(ctx.CH(off) == ctx.CH(beg)) + n++; + off++; + } + + if(n < 3) { + *p_killer = off; + return FALSE; + } + + /* Nothing else can be present on the line. */ + if(off < ctx.size && !ctx.ISNEWLINE(off)) { + *p_killer = off; + return FALSE; + } + + *p_end = off; + return TRUE; +} + +static int +md_is_atxheader_line(MD_CTX* ctx, OFF beg, OFF* p_beg, OFF* p_end, uint* p_level) +{ + int n; + OFF off = beg + 1; + + while(off < ctx.size && ctx.CH(off) == '#' && off - beg < 7) + off++; + n = off - beg; + + if(n > 6) + return FALSE; + *p_level = n; + + if(!(ctx.parser.flags & MD_FLAG_PERMISSIVEATXHEADERS) && off < ctx.size && + ctx.CH(off) != ' ' && ctx.CH(off) != '\t' && !ctx.ISNEWLINE(off)) + return FALSE; + + while(off < ctx.size && ctx.CH(off) == ' ') + off++; + *p_beg = off; + *p_end = off; + return TRUE; +} + +static int +md_is_setext_underline(MD_CTX* ctx, OFF beg, OFF* p_end, uint* p_level) +{ + OFF off = beg + 1; + + while(off < ctx.size && ctx.CH(off) == ctx.CH(beg)) + off++; + + /* Optionally, space(s) can follow. */ + while(off < ctx.size && ctx.CH(off) == ' ') + off++; + + /* But nothing more is allowed on the line. */ + if(off < ctx.size && !ctx.ISNEWLINE(off)) + return FALSE; + + *p_level = (ctx.CH(beg) == '=' ? 1 : 2); + *p_end = off; + return TRUE; +} + +int md_is_table_underline(MD_CTX* ctx, OFF beg, OFF* p_end, uint* p_col_count) +{ + OFF off = beg; + int found_pipe = FALSE; + uint col_count = 0; + + if(off < ctx.size && ctx.CH(off) == '|') { + found_pipe = TRUE; + off++; + while(off < ctx.size && ctx.ISWHITESPACE(off)) + off++; + } + + while(1) { + OFF cell_beg; + int delimited = FALSE; + + /* Cell underline ("-----", ":----", "----:" or ":----:") */ + cell_beg = off; + if(off < ctx.size && ctx.CH(off) == ':') + off++; + while(off < ctx.size && ctx.CH(off) == '-') + off++; + if(off < ctx.size && ctx.CH(off) == ':') + off++; + if(off - cell_beg < 3) + return FALSE; + + col_count++; + + /* Pipe delimiter (optional at the end of line). */ + while(off < ctx.size && ctx.ISWHITESPACE(off)) + off++; + if(off < ctx.size && ctx.CH(off) == '|') { + delimited = TRUE; + found_pipe = TRUE; + off++; + while(off < ctx.size && ctx.ISWHITESPACE(off)) + off++; + } + + /* Success, if we reach end of line. */ + if(off >= ctx.size || ctx.ISNEWLINE(off)) + break; + + if(!delimited) + return FALSE; + } + + if(!found_pipe) + return FALSE; + + *p_end = off; + *p_col_count = col_count; + return TRUE; +} + +static int +md_is_opening_code_fence(MD_CTX* ctx, OFF beg, OFF* p_end) +{ + OFF off = beg; + + while(off < ctx.size && ctx.CH(off) == ctx.CH(beg)) + off++; + + /* Fence must have at least three characters. */ + if(off - beg < 3) + return FALSE; + + ctx.code_fence_length = off - beg; + + /* Optionally, space(s) can follow. */ + while(off < ctx.size && ctx.CH(off) == ' ') + off++; + + /* Optionally, an info string can follow. */ + while(off < ctx.size && !ctx.ISNEWLINE(off)) { + /* Backtick-based fence must not contain '`' in the info string. */ + if(ctx.CH(beg) == '`' && ctx.CH(off) == '`') + return FALSE; + off++; + } + + *p_end = off; + return TRUE; +} + +static int +md_is_closing_code_fence(MD_CTX* ctx, CHAR ch, OFF beg, OFF* p_end) +{ + OFF off = beg; + int ret = FALSE; + + /* Closing fence must have at least the same length and use same char as + * opening one. */ + while(off < ctx.size && ctx.CH(off) == ch) + off++; + if(off - beg < ctx.code_fence_length) + goto out_; + + /* Optionally, space(s) can follow */ + while(off < ctx.size && ctx.CH(off) == ' ') + off++; + + /* But nothing more is allowed on the line. */ + if(off < ctx.size && !ctx.ISNEWLINE(off)) + goto out_; + + ret = TRUE; + +out_: + /* Note we set *p_end even on failure: If we are not closing fence, caller + * would eat the line anyway without any parsing. */ + *p_end = off; + return ret; +} + +/* Returns type of the raw HTML block, or FALSE if it is not HTML block. + * (Refer to CommonMark specification for details about the types.) + */ +int md_is_html_block_start_condition(MD_CTX* ctx, OFF beg) +{ + /* Type 6 is started by a long list of allowed tags. We use two-level + * tree to speed-up the search. */ + + static immutable string Xend = null; + static immutable string[] t1 = [ "script", "pre", "style", Xend ]; + static immutable string[] a6 = [ "address", "article", "aside", Xend ]; + static immutable string[] b6 = [ "base", "basefont", "blockquote", "body", Xend ]; + static immutable string[] c6 = [ "caption", "center", "col", "colgroup", Xend ]; + static immutable string[] d6 = [ "dd", "details", "dialog", "dir", + "div", "dl", "dt", Xend ]; + static immutable string[] f6 = [ "fieldset", "figcaption", "figure", "footer", + "form", "frame", "frameset", Xend ]; + static immutable string[] h6 = [ "h1", "head", "header", "hr", "html", Xend ]; + static immutable string[] i6 = [ "iframe", Xend ]; + static immutable string[] l6 = [ "legend", "li", "link", Xend ]; + static immutable string[] m6 = [ "main", "menu", "menuitem", Xend ]; + static immutable string[] n6 = [ "nav", "noframes", Xend ]; + static immutable string[] o6 = [ "ol", "optgroup", "option", Xend ]; + static immutable string[] p6 = [ "p", "param", Xend ]; + static immutable string[] s6 = [ "section", "source", "summary", Xend ]; + static immutable string[] t6 = [ "table", "tbody", "td", "tfoot", "th", + "thead", "title", "tr", "track", Xend ]; + static immutable string[] u6 = [ "ul", Xend ]; + static immutable string[] xx = [ Xend ]; + + immutable(string)*[26] map6; + map6[0] = a6.ptr; + map6[1] = b6.ptr; + map6[2] = c6.ptr; + map6[3] = d6.ptr; + map6[4] = xx.ptr; + map6[5] = f6.ptr; + map6[6] = xx.ptr; + map6[7] = h6.ptr; + map6[8] = i6.ptr; + map6[9] = xx.ptr; + map6[10] = xx.ptr; + map6[11] = l6.ptr; + map6[12] = m6.ptr; + map6[13] = n6.ptr; + map6[14] = o6.ptr; + map6[15] = p6.ptr; + map6[16] = xx.ptr; + map6[17] = xx.ptr; + map6[18] = s6.ptr; + map6[19] = t6.ptr; + map6[20] = u6.ptr; + map6[21] = xx.ptr; + map6[22] = xx.ptr; + map6[23] = xx.ptr; + map6[24] = xx.ptr; + map6[25] = xx.ptr; + + OFF off = beg + 1; + int i; + + /* Check for type 1: = ctx.size) + return 6; + if(ctx.ISBLANK(tmp) || ctx.ISNEWLINE(tmp) || ctx.CH(tmp) == '>') + return 6; + if(tmp+1 < ctx.size && ctx.CH(tmp) == '/' && ctx.CH(tmp+1) == '>') + return 6; + break; + } + } + } + } + + /* Check for type 7: any COMPLETE other opening or closing tag. */ + if(off + 1 < ctx.size) { + OFF end; + + if(md_is_html_tag(ctx, null, 0, beg, ctx.size, &end)) { + /* Only optional whitespace and new line may follow. */ + while(end < ctx.size && ctx.ISWHITESPACE(end)) + end++; + if(end >= ctx.size || ctx.ISNEWLINE(end)) + return 7; + } + } + + return FALSE; +} + +/* Case sensitive check whether there is a substring 'what' between 'beg' + * and end of line. */ +static int +md_line_contains(MD_CTX* ctx, OFF beg, const(CHAR)* what, SZ what_len, OFF* p_end) +{ + OFF i; + for(i = beg; i + what_len < ctx.size; i++) { + if(ctx.ISNEWLINE(i)) + break; + if(memcmp(ctx.STR(i), what, what_len * CHAR.sizeof) == 0) { + *p_end = i + what_len; + return TRUE; + } + } + + *p_end = i; + return FALSE; +} + +/* Returns type of HTML block end condition or FALSE if not an end condition. + * + * Note it fills p_end even when it is not end condition as the caller + * does not need to analyze contents of a raw HTML block. + */ +int md_is_html_block_end_condition(MD_CTX* ctx, OFF beg, OFF* p_end) +{ + switch(ctx.html_block_type) { + case 1: + { + OFF off = beg; + + while(off < ctx.size && !ctx.ISNEWLINE(off)) { + if(ctx.CH(off) == '<') { + if(md_ascii_case_eq(ctx.STR(off), "", 9)) { + *p_end = off + 9; + return TRUE; + } + + if(md_ascii_case_eq(ctx.STR(off), "", 8)) { + *p_end = off + 8; + return TRUE; + } + + if(md_ascii_case_eq(ctx.STR(off), "", 6)) { + *p_end = off + 6; + return TRUE; + } + } + + off++; + } + *p_end = off; + return FALSE; + } + + case 2: + return (md_line_contains(ctx, beg, "-->", 3, p_end) ? 2 : FALSE); + + case 3: + return (md_line_contains(ctx, beg, "?>", 2, p_end) ? 3 : FALSE); + + case 4: + return (md_line_contains(ctx, beg, ">", 1, p_end) ? 4 : FALSE); + + case 5: + return (md_line_contains(ctx, beg, "]]>", 3, p_end) ? 5 : FALSE); + + case 6: /* Pass through */ + case 7: + *p_end = beg; + return (ctx.ISNEWLINE(beg) ? ctx.html_block_type : FALSE); + + default: + assert(false); + } +} + + +static int +md_is_container_compatible(const MD_CONTAINER* pivot, const MD_CONTAINER* container) +{ + /* Block quote has no "items" like lists. */ + if(container.ch == '>') + return FALSE; + + if(container.ch != pivot.ch) + return FALSE; + if(container.mark_indent > pivot.contents_indent) + return FALSE; + + return TRUE; +} + +static int +md_push_container(MD_CTX* ctx, const MD_CONTAINER* container) +{ + if(ctx.n_containers >= ctx.alloc_containers) { + MD_CONTAINER* new_containers; + + ctx.alloc_containers = (ctx.alloc_containers > 0 ? ctx.alloc_containers * 2 : 16); + new_containers = cast(MD_CONTAINER*) realloc_safe(ctx.containers, ctx.alloc_containers * MD_CONTAINER.sizeof); + if (new_containers == null) { + ctx.MD_LOG("realloc() failed."); + return -1; + } + + ctx.containers = new_containers; + } + + memcpy(&ctx.containers[ctx.n_containers++], container, MD_CONTAINER.sizeof); + return 0; +} + +static int +md_enter_child_containers(MD_CTX* ctx, int n_children, uint data) +{ + int i; + int ret = 0; + + for(i = ctx.n_containers - n_children; i < ctx.n_containers; i++) { + MD_CONTAINER* c = &ctx.containers[i]; + int is_ordered_list = FALSE; + + switch(c.ch) { + case ')': + case '.': + is_ordered_list = TRUE; + /* Pass through */ + goto case '-'; + + case '-': + case '+': + case '*': + /* Remember offset in ctx.block_bytes so we can revisit the + * block if we detect it is a loose list. */ + md_end_current_block(ctx); + c.block_byte_off = ctx.n_block_bytes; + + ret = (md_push_container_bytes(ctx, + (is_ordered_list ? MD_BLOCK_OL : MD_BLOCK_UL), + c.start, data, MD_BLOCK_CONTAINER_OPENER)); + if (ret < 0) goto abort; + ret = (md_push_container_bytes(ctx, MD_BLOCK_LI, + c.task_mark_off, + (c.is_task ? ctx.CH(c.task_mark_off) : 0), + MD_BLOCK_CONTAINER_OPENER)); + if (ret < 0) goto abort; + break; + + case '>': + ret = (md_push_container_bytes(ctx, MD_BLOCK_QUOTE, 0, 0, MD_BLOCK_CONTAINER_OPENER)); + if (ret < 0) goto abort; + break; + + default: + assert(false); + } + } + +abort: + return ret; +} + +static int +md_leave_child_containers(MD_CTX* ctx, int n_keep) +{ + int ret = 0; + + while(ctx.n_containers > n_keep) { + MD_CONTAINER* c = &ctx.containers[ctx.n_containers-1]; + int is_ordered_list = FALSE; + + switch(c.ch) { + case ')': + case '.': + is_ordered_list = TRUE; + /* Pass through */ + goto case '-'; + + case '-': + case '+': + case '*': + ret = (md_push_container_bytes(ctx, MD_BLOCK_LI, + c.task_mark_off, (c.is_task ? ctx.CH(c.task_mark_off) : 0), + MD_BLOCK_CONTAINER_CLOSER)); + if (ret < 0) goto abort; + ret = (md_push_container_bytes(ctx, + (is_ordered_list ? MD_BLOCK_OL : MD_BLOCK_UL), 0, + c.ch, MD_BLOCK_CONTAINER_CLOSER)); + if (ret < 0) goto abort; + break; + + case '>': + ret = (md_push_container_bytes(ctx, MD_BLOCK_QUOTE, 0, + 0, MD_BLOCK_CONTAINER_CLOSER)); + if (ret < 0) goto abort; + break; + + default: + assert(false); + } + + ctx.n_containers--; + } + +abort: + return ret; +} + +static int +md_is_container_mark(MD_CTX* ctx, uint indent, OFF beg, OFF* p_end, MD_CONTAINER* p_container) +{ + OFF off = beg; + OFF max_end; + + if(indent >= ctx.code_indent_offset) + return FALSE; + + /* Check for block quote mark. */ + if(off < ctx.size && ctx.CH(off) == '>') { + off++; + p_container.ch = '>'; + p_container.is_loose = FALSE; + p_container.is_task = FALSE; + p_container.mark_indent = indent; + p_container.contents_indent = indent + 1; + *p_end = off; + return TRUE; + } + + /* Check for list item bullet mark. */ + if(off+1 < ctx.size && ctx.ISANYOF(off, "-+*") && (ctx.ISBLANK(off+1) || ctx.ISNEWLINE(off+1))) { + p_container.ch = ctx.CH(off); + p_container.is_loose = FALSE; + p_container.is_task = FALSE; + p_container.mark_indent = indent; + p_container.contents_indent = indent + 1; + *p_end = off + 1; + return TRUE; + } + + /* Check for ordered list item marks. */ + max_end = off + 9; + if(max_end > ctx.size) + max_end = ctx.size; + p_container.start = 0; + while(off < max_end && ctx.ISDIGIT(off)) { + p_container.start = p_container.start * 10 + ctx.CH(off) - '0'; + off++; + } + if(off+1 < ctx.size && (ctx.CH(off) == '.' || ctx.CH(off) == ')') && (ctx.ISBLANK(off+1) || ctx.ISNEWLINE(off+1))) { + p_container.ch = ctx.CH(off); + p_container.is_loose = FALSE; + p_container.is_task = FALSE; + p_container.mark_indent = indent; + p_container.contents_indent = indent + off - beg + 1; + *p_end = off + 1; + return TRUE; + } + + return FALSE; +} + +uint md_line_indentation(MD_CTX* ctx, uint total_indent, OFF beg, OFF* p_end) +{ + OFF off = beg; + uint indent = total_indent; + + while(off < ctx.size && ctx.ISBLANK(off)) { + if(ctx.CH(off) == '\t') + indent = (indent + 4) & ~3; + else + indent++; + off++; + } + + *p_end = off; + return indent - total_indent; +} + +static const MD_LINE_ANALYSIS md_dummy_blank_line = { MD_LINE_BLANK, 0 }; + +/* Analyze type of the line and find some its properties. This serves as a + * main input for determining type and boundaries of a block. */ +int md_analyze_line(MD_CTX* ctx, OFF beg, OFF* p_end, + const(MD_LINE_ANALYSIS)* pivot_line, MD_LINE_ANALYSIS* line) +{ + uint total_indent = 0; + int n_parents = 0; + int n_brothers = 0; + int n_children = 0; + MD_CONTAINER container = { 0 }; + int prev_line_has_list_loosening_effect = ctx.last_line_has_list_loosening_effect; + OFF off = beg; + OFF hr_killer = 0; + int ret = 0; + + line.indent = md_line_indentation(ctx, total_indent, off, &off); + total_indent += line.indent; + line.beg = off; + + /* Given the indentation and block quote marks '>', determine how many of + * the current containers are our parents. */ + while(n_parents < ctx.n_containers) { + MD_CONTAINER* c = &ctx.containers[n_parents]; + + if(c.ch == '>' && line.indent < ctx.code_indent_offset && + off < ctx.size && ctx.CH(off) == '>') + { + /* Block quote mark. */ + off++; + total_indent++; + line.indent = md_line_indentation(ctx, total_indent, off, &off); + total_indent += line.indent; + + /* The optional 1st space after '>' is part of the block quote mark. */ + if(line.indent > 0) + line.indent--; + + line.beg = off; + } else if(c.ch != '>' && line.indent >= c.contents_indent) { + /* List. */ + line.indent -= c.contents_indent; + } else { + break; + } + + n_parents++; + } + + if(off >= ctx.size || ctx.ISNEWLINE(off)) { + /* Blank line does not need any real indentation to be nested inside + * a list. */ + if(n_brothers + n_children == 0) { + while(n_parents < ctx.n_containers && ctx.containers[n_parents].ch != '>') + n_parents++; + } + } + + while(TRUE) { + /* Check whether we are fenced code continuation. */ + if(pivot_line.type == MD_LINE_FENCEDCODE) { + line.beg = off; + + /* We are another MD_LINE_FENCEDCODE unless we are closing fence + * which we transform into MD_LINE_BLANK. */ + if(line.indent < ctx.code_indent_offset) { + if(md_is_closing_code_fence(ctx, ctx.CH(pivot_line.beg), off, &off)) { + line.type = MD_LINE_BLANK; + ctx.last_line_has_list_loosening_effect = FALSE; + break; + } + } + + /* Change indentation accordingly to the initial code fence. */ + if(n_parents == ctx.n_containers) { + if(line.indent > pivot_line.indent) + line.indent -= pivot_line.indent; + else + line.indent = 0; + + line.type = MD_LINE_FENCEDCODE; + break; + } + } + + /* Check whether we are HTML block continuation. */ + if(pivot_line.type == MD_LINE_HTML && ctx.html_block_type > 0) { + int html_block_type; + + html_block_type = md_is_html_block_end_condition(ctx, off, &off); + if(html_block_type > 0) { + assert(html_block_type == ctx.html_block_type); + + /* Make sure this is the last line of the block. */ + ctx.html_block_type = 0; + + /* Some end conditions serve as blank lines at the same time. */ + if(html_block_type == 6 || html_block_type == 7) { + line.type = MD_LINE_BLANK; + line.indent = 0; + break; + } + } + + if(n_parents == ctx.n_containers) { + line.type = MD_LINE_HTML; + break; + } + } + + /* Check for blank line. */ + if(off >= ctx.size || ctx.ISNEWLINE(off)) { + if(pivot_line.type == MD_LINE_INDENTEDCODE && n_parents == ctx.n_containers) { + line.type = MD_LINE_INDENTEDCODE; + if(line.indent > ctx.code_indent_offset) + line.indent -= ctx.code_indent_offset; + else + line.indent = 0; + ctx.last_line_has_list_loosening_effect = FALSE; + } else { + line.type = MD_LINE_BLANK; + ctx.last_line_has_list_loosening_effect = (n_parents > 0 && + n_brothers + n_children == 0 && + ctx.containers[n_parents-1].ch != '>'); + + /* See https://github.com/mity/md4c/issues/6 + * + * This ugly checking tests we are in (yet empty) list item but not + * its very first line (with the list item mark). + * + * If we are such blank line, then any following non-blank line + * which would be part of this list item actually ends the list + * because "a list item can begin with at most one blank line." + */ + if(n_parents > 0 && ctx.containers[n_parents-1].ch != '>' && + n_brothers + n_children == 0 && ctx.current_block == null && + ctx.n_block_bytes > cast(int) MD_BLOCK.sizeof) + { + MD_BLOCK* top_block = cast(MD_BLOCK*) (cast(char*)ctx.block_bytes + ctx.n_block_bytes - MD_BLOCK.sizeof); + if(top_block.type == MD_BLOCK_LI) + ctx.last_list_item_starts_with_two_blank_lines = TRUE; + } + } + break; + } else { + /* This is 2nd half of the hack. If the flag is set (that is there + * were 2nd blank line at the start of the list item) and we would also + * belonging to such list item, then interrupt the list. */ + ctx.last_line_has_list_loosening_effect = FALSE; + if(ctx.last_list_item_starts_with_two_blank_lines) { + if(n_parents > 0 && ctx.containers[n_parents-1].ch != '>' && + n_brothers + n_children == 0 && ctx.current_block == null && + ctx.n_block_bytes > cast(int) MD_BLOCK.sizeof) + { + MD_BLOCK* top_block = cast(MD_BLOCK*) (cast(char*)ctx.block_bytes + ctx.n_block_bytes - MD_BLOCK.sizeof); + if(top_block.type == MD_BLOCK_LI) + n_parents--; + } + + ctx.last_list_item_starts_with_two_blank_lines = FALSE; + } + } + + /* Check whether we are Setext underline. */ + if(line.indent < ctx.code_indent_offset && pivot_line.type == MD_LINE_TEXT + && (ctx.CH(off) == '=' || ctx.CH(off) == '-') + && (n_parents == ctx.n_containers)) + { + uint level; + + if(md_is_setext_underline(ctx, off, &off, &level)) { + line.type = MD_LINE_SETEXTUNDERLINE; + line.data = level; + break; + } + } + + /* Check for thematic break line. */ + if(line.indent < ctx.code_indent_offset && ctx.ISANYOF(off, "-_*") && off >= hr_killer) { + if(md_is_hr_line(ctx, off, &off, &hr_killer)) { + line.type = MD_LINE_HR; + break; + } + } + + /* Check for "brother" container. I.e. whether we are another list item + * in already started list. */ + if(n_parents < ctx.n_containers && n_brothers + n_children == 0) { + OFF tmp; + + if(md_is_container_mark(ctx, line.indent, off, &tmp, &container) && + md_is_container_compatible(&ctx.containers[n_parents], &container)) + { + pivot_line = &md_dummy_blank_line; + + off = tmp; + + total_indent += container.contents_indent - container.mark_indent; + line.indent = md_line_indentation(ctx, total_indent, off, &off); + total_indent += line.indent; + line.beg = off; + + /* Some of the following whitespace actually still belongs to the mark. */ + if(off >= ctx.size || ctx.ISNEWLINE(off)) { + container.contents_indent++; + } else if(line.indent <= ctx.code_indent_offset) { + container.contents_indent += line.indent; + line.indent = 0; + } else { + container.contents_indent += 1; + line.indent--; + } + + ctx.containers[n_parents].mark_indent = container.mark_indent; + ctx.containers[n_parents].contents_indent = container.contents_indent; + + n_brothers++; + continue; + } + } + + /* Check for indented code. + * Note indented code block cannot interrupt a paragraph. */ + if(line.indent >= ctx.code_indent_offset && + (pivot_line.type == MD_LINE_BLANK || pivot_line.type == MD_LINE_INDENTEDCODE)) + { + line.type = MD_LINE_INDENTEDCODE; + assert(line.indent >= ctx.code_indent_offset); + line.indent -= ctx.code_indent_offset; + line.data = 0; + break; + } + + /* Check for start of a new container block. */ + if(line.indent < ctx.code_indent_offset && + md_is_container_mark(ctx, line.indent, off, &off, &container)) + { + if(pivot_line.type == MD_LINE_TEXT && n_parents == ctx.n_containers && + (off >= ctx.size || ctx.ISNEWLINE(off)) && container.ch != '>') + { + /* Noop. List mark followed by a blank line cannot interrupt a paragraph. */ + } else if(pivot_line.type == MD_LINE_TEXT && n_parents == ctx.n_containers && + (container.ch == '.' || container.ch == ')') && container.start != 1) + { + /* Noop. Ordered list cannot interrupt a paragraph unless the start index is 1. */ + } else { + total_indent += container.contents_indent - container.mark_indent; + line.indent = md_line_indentation(ctx, total_indent, off, &off); + total_indent += line.indent; + + line.beg = off; + line.data = container.ch; + + /* Some of the following whitespace actually still belongs to the mark. */ + if(off >= ctx.size || ctx.ISNEWLINE(off)) { + container.contents_indent++; + } else if(line.indent <= ctx.code_indent_offset) { + container.contents_indent += line.indent; + line.indent = 0; + } else { + container.contents_indent += 1; + line.indent--; + } + + if(n_brothers + n_children == 0) + pivot_line = &md_dummy_blank_line; + + if(n_children == 0) + { + ret = (md_leave_child_containers(ctx, n_parents + n_brothers)); + if (ret < 0) goto abort; + } + + n_children++; + ret = (md_push_container(ctx, &container)); + if (ret < 0) goto abort; + continue; + } + } + + /* Check whether we are table continuation. */ + if(pivot_line.type == MD_LINE_TABLE && md_is_table_row(ctx, off, &off) && + n_parents == ctx.n_containers) + { + line.type = MD_LINE_TABLE; + break; + } + + /* Check for ATX header. */ + if(line.indent < ctx.code_indent_offset && ctx.CH(off) == '#') { + uint level; + + if(md_is_atxheader_line(ctx, off, &line.beg, &off, &level)) { + line.type = MD_LINE_ATXHEADER; + line.data = level; + break; + } + } + + /* Check whether we are starting code fence. */ + if(ctx.CH(off) == '`' || ctx.CH(off) == '~') { + if(md_is_opening_code_fence(ctx, off, &off)) { + line.type = MD_LINE_FENCEDCODE; + line.data = 1; + break; + } + } + + /* Check for start of raw HTML block. */ + if(ctx.CH(off) == '<' && !(ctx.parser.flags & MD_FLAG_NOHTMLBLOCKS)) + { + ctx.html_block_type = md_is_html_block_start_condition(ctx, off); + + /* HTML block type 7 cannot interrupt paragraph. */ + if(ctx.html_block_type == 7 && pivot_line.type == MD_LINE_TEXT) + ctx.html_block_type = 0; + + if(ctx.html_block_type > 0) { + /* The line itself also may immediately close the block. */ + if(md_is_html_block_end_condition(ctx, off, &off) == ctx.html_block_type) { + /* Make sure this is the last line of the block. */ + ctx.html_block_type = 0; + } + + line.type = MD_LINE_HTML; + break; + } + } + + /* Check for table underline. */ + if((ctx.parser.flags & MD_FLAG_TABLES) && pivot_line.type == MD_LINE_TEXT && + (ctx.CH(off) == '|' || ctx.CH(off) == '-' || ctx.CH(off) == ':') && + n_parents == ctx.n_containers) + { + uint col_count; + + if(ctx.current_block != null && ctx.current_block.n_lines == 1 && + md_is_table_underline(ctx, off, &off, &col_count) && + md_is_table_row(ctx, pivot_line.beg, null)) + { + line.data = col_count; + line.type = MD_LINE_TABLEUNDERLINE; + break; + } + } + + /* By default, we are normal text line. */ + line.type = MD_LINE_TEXT; + if(pivot_line.type == MD_LINE_TEXT && n_brothers + n_children == 0) { + /* Lazy continuation. */ + n_parents = ctx.n_containers; + } + + /* Check for task mark. */ + if((ctx.parser.flags & MD_FLAG_TASKLISTS) && n_brothers + n_children > 0 && + ISANYOF_(ctx.containers[ctx.n_containers-1].ch, "-+*.)")) + { + OFF tmp = off; + + while(tmp < ctx.size && tmp < off + 3 && ctx.ISBLANK(tmp)) + tmp++; + if(tmp + 2 < ctx.size && ctx.CH(tmp) == '[' && + ctx.ISANYOF(tmp+1, "xX ") && ctx.CH(tmp+2) == ']' && + (tmp + 3 == ctx.size || ctx.ISBLANK(tmp+3) || ctx.ISNEWLINE(tmp+3))) + { + MD_CONTAINER* task_container = (n_children > 0 ? &ctx.containers[ctx.n_containers-1] : &container); + task_container.is_task = TRUE; + task_container.task_mark_off = tmp + 1; + off = tmp + 3; + while(ctx.ISWHITESPACE(off)) + off++; + line.beg = off; + } + } + + break; + } + + /* Scan for end of the line. + * + * Note this is quite a bottleneck of the parsing as we here iterate almost + * over compete document. + */ + { + /* Optimization: Use some loop unrolling. */ + while(off + 3 < ctx.size && !ctx.ISNEWLINE(off+0) && !ctx.ISNEWLINE(off+1) + && !ctx.ISNEWLINE(off+2) && !ctx.ISNEWLINE(off+3)) + off += 4; + while(off < ctx.size && !ctx.ISNEWLINE(off)) + off++; + } + + /* Set end of the line. */ + line.end = off; + + /* But for ATX header, we should exclude the optional trailing mark. */ + if(line.type == MD_LINE_ATXHEADER) { + OFF tmp = line.end; + while(tmp > line.beg && ctx.CH(tmp-1) == ' ') + tmp--; + while(tmp > line.beg && ctx.CH(tmp-1) == '#') + tmp--; + if(tmp == line.beg || ctx.CH(tmp-1) == ' ' || (ctx.parser.flags & MD_FLAG_PERMISSIVEATXHEADERS)) + line.end = tmp; + } + + /* Trim trailing spaces. */ + if(line.type != MD_LINE_INDENTEDCODE && line.type != MD_LINE_FENCEDCODE) { + while(line.end > line.beg && ctx.CH(line.end-1) == ' ') + line.end--; + } + + /* Eat also the new line. */ + if(off < ctx.size && ctx.CH(off) == '\r') + off++; + if(off < ctx.size && ctx.CH(off) == '\n') + off++; + + *p_end = off; + + /* If we belong to a list after seeing a blank line, the list is loose. */ + if(prev_line_has_list_loosening_effect && line.type != MD_LINE_BLANK && n_parents + n_brothers > 0) { + MD_CONTAINER* c = &ctx.containers[n_parents + n_brothers - 1]; + if(c.ch != '>') { + MD_BLOCK* block = cast(MD_BLOCK*) ((cast(char*)ctx.block_bytes) + c.block_byte_off); + block.flags = block.flags | MD_BLOCK_LOOSE_LIST; + } + } + + /* Leave any containers we are not part of anymore. */ + if(n_children == 0 && n_parents + n_brothers < ctx.n_containers) + { + ret = (md_leave_child_containers(ctx, n_parents + n_brothers)); + if (ret < 0) goto abort; + } + + /* Enter any container we found a mark for. */ + if(n_brothers > 0) { + assert(n_brothers == 1); + ret = (md_push_container_bytes(ctx, MD_BLOCK_LI, + ctx.containers[n_parents].task_mark_off, + (ctx.containers[n_parents].is_task ? ctx.CH(ctx.containers[n_parents].task_mark_off) : 0), + MD_BLOCK_CONTAINER_CLOSER)); + if (ret < 0) goto abort; + ret = (md_push_container_bytes(ctx, MD_BLOCK_LI, + container.task_mark_off, + (container.is_task ? ctx.CH(container.task_mark_off) : 0), + MD_BLOCK_CONTAINER_OPENER)); + if (ret < 0) goto abort; + ctx.containers[n_parents].is_task = container.is_task; + ctx.containers[n_parents].task_mark_off = container.task_mark_off; + } + + if(n_children > 0) + { + ret = (md_enter_child_containers(ctx, n_children, line.data)); + if (ret < 0) goto abort; + } + +abort: + return ret; +} + +int md_process_line(MD_CTX* ctx, const(MD_LINE_ANALYSIS)** p_pivot_line, MD_LINE_ANALYSIS* line) +{ + const(MD_LINE_ANALYSIS)* pivot_line = *p_pivot_line; + int ret = 0; + + /* Blank line ends current leaf block. */ + if(line.type == MD_LINE_BLANK) { + ret = (md_end_current_block(ctx)); + if (ret < 0) goto abort; + *p_pivot_line = &md_dummy_blank_line; + return 0; + } + + /* Some line types form block on their own. */ + if(line.type == MD_LINE_HR || line.type == MD_LINE_ATXHEADER) { + ret = (md_end_current_block(ctx)); + if (ret < 0) goto abort; + + /* Add our single-line block. */ + ret = (md_start_new_block(ctx, line)); + if (ret < 0) goto abort; + ret = (md_add_line_into_current_block(ctx, line)); + if (ret < 0) goto abort; + ret = (md_end_current_block(ctx)); + if (ret < 0) goto abort; + *p_pivot_line = &md_dummy_blank_line; + return 0; + } + + /* MD_LINE_SETEXTUNDERLINE changes meaning of the current block and ends it. */ + if(line.type == MD_LINE_SETEXTUNDERLINE) { + assert(ctx.current_block != null); + ctx.current_block.type = MD_BLOCK_H; + ctx.current_block.data = line.data; + ctx.current_block.flags = ctx.current_block.flags | MD_BLOCK_SETEXT_HEADER; + ret = (md_add_line_into_current_block(ctx, line)); + if (ret < 0) goto abort; + ret = (md_end_current_block(ctx)); + if (ret < 0) goto abort; + if(ctx.current_block == null) { + *p_pivot_line = &md_dummy_blank_line; + } else { + /* This happens if we have consumed all the body as link ref. defs. + * and downgraded the underline into start of a new paragraph block. */ + line.type = MD_LINE_TEXT; + *p_pivot_line = line; + } + return 0; + } + + /* MD_LINE_TABLEUNDERLINE changes meaning of the current block. */ + if(line.type == MD_LINE_TABLEUNDERLINE) { + assert(ctx.current_block != null); + assert(ctx.current_block.n_lines == 1); + ctx.current_block.type = MD_BLOCK_TABLE; + ctx.current_block.data = line.data; + assert(pivot_line != &md_dummy_blank_line); + (cast(MD_LINE_ANALYSIS*)pivot_line).type = MD_LINE_TABLE; + ret = (md_add_line_into_current_block(ctx, line)); + if (ret < 0) goto abort; + return 0; + } + + /* The current block also ends if the line has different type. */ + if(line.type != pivot_line.type) + { + ret = (md_end_current_block(ctx)); + if (ret < 0) goto abort; + } + + /* The current line may start a new block. */ + if(ctx.current_block == null) { + ret = (md_start_new_block(ctx, line)); + if (ret < 0) goto abort; + *p_pivot_line = line; + } + + /* In all other cases the line is just a continuation of the current block. */ + ret = (md_add_line_into_current_block(ctx, line)); + if (ret < 0) goto abort; + +abort: + return ret; +} + +int md_process_doc(MD_CTX *ctx) +{ + const(MD_LINE_ANALYSIS)* pivot_line = &md_dummy_blank_line; + MD_LINE_ANALYSIS[2] line_buf; + MD_LINE_ANALYSIS* line = &line_buf[0]; + OFF off = 0; + int ret = 0; + + ret = MD_ENTER_BLOCK(ctx, MD_BLOCK_DOC, null); + if (ret != 0) goto abort; + + while(off < ctx.size) { + if(line == pivot_line) + line = (line == &line_buf[0] ? &line_buf[1] : &line_buf[0]); + + ret = (md_analyze_line(ctx, off, &off, pivot_line, line)); + if (ret < 0) goto abort; + ret = (md_process_line(ctx, &pivot_line, line)); + if (ret < 0) goto abort; + } + + md_end_current_block(ctx); + + ret = (md_build_ref_def_hashtable(ctx)); + if (ret < 0) goto abort; + + /* Process all blocks. */ + ret = (md_leave_child_containers(ctx, 0)); + if (ret < 0) goto abort; + ret = (md_process_all_blocks(ctx)); + if (ret < 0) goto abort; + + ret = MD_LEAVE_BLOCK(ctx, MD_BLOCK_DOC, null); + if (ret != 0) goto abort; + +abort: + + debug(bench) + /* Output some memory consumption statistics. */ + { + char[256] buffer; + sprintf(buffer, "Alloced %u bytes for block buffer.", + cast(uint)(ctx.alloc_block_bytes)); + ctx.MD_LOG(buffer); + + sprintf(buffer, "Alloced %u bytes for containers buffer.", + cast(uint)(ctx.alloc_containers * MD_CONTAINER.sizeof)); + ctx.MD_LOG(buffer); + + sprintf(buffer, "Alloced %u bytes for marks buffer.", + cast(uint)(ctx.alloc_marks * MD_MARK.sizeof)); + ctx.MD_LOG(buffer); + + sprintf(buffer, "Alloced %u bytes for aux. buffer.", + cast(uint)(ctx.alloc_buffer * MD_CHAR.sizeof)); + ctx.MD_LOG(buffer); + } + + return ret; +} + + +/******************** + *** Public API *** + ********************/ + +/** + * Parse the Markdown document stored in the string 'text' of size 'size'. + * The renderer provides callbacks to be called during the parsing so the + * caller can render the document on the screen or convert the Markdown + * to another format. + * + * Zero is returned on success. If a runtime error occurs (e.g. a memory + * fails), -1 is returned. If the processing is aborted due any callback + * returning non-zero, md_parse() the return value of the callback is returned. + */ +int md_parse(const MD_CHAR* text, MD_SIZE size, const MD_PARSER* parser, void* userdata) +{ + MD_CTX ctx; + int i; + int ret; + + if(parser.abi_version != 0) { + if(parser.debug_log != null) + parser.debug_log("Unsupported abi_version.", userdata); + return -1; + } + + /* Setup context structure. */ + memset(&ctx, 0, MD_CTX.sizeof); + ctx.text = text; + ctx.size = size; + memcpy(&ctx.parser, parser, MD_PARSER.sizeof); + ctx.userdata = userdata; + ctx.code_indent_offset = (ctx.parser.flags & MD_FLAG_NOINDENTEDCODEBLOCKS) ? cast(OFF)(-1) : 4; + md_build_mark_char_map(&ctx); + ctx.doc_ends_with_newline = (size > 0 && ISNEWLINE_(text[size-1])); + + /* Reset all unresolved opener mark chains. */ + for(i = 0; i < cast(int) (ctx.mark_chains.length); i++) { + ctx.mark_chains[i].head = -1; + ctx.mark_chains[i].tail = -1; + } + ctx.unresolved_link_head = -1; + ctx.unresolved_link_tail = -1; + + /* All the work. */ + ret = md_process_doc(&ctx); + + /* Clean-up. */ + md_free_ref_defs(&ctx); + md_free_ref_def_hashtable(&ctx); + free(ctx.buffer); + free(ctx.marks); + free(ctx.block_bytes); + free(ctx.containers); + + return ret; +} + +// +// HTML ENTITIES +// + +/* Most entities are formed by single Unicode codepoint, few by two codepoints. + * Single-codepoint entities have codepoints[1] set to zero. */ +struct entity +{ + const(char)* name; + uint[2] codepoints; +} + +/* The table is generated from https://html.spec.whatwg.org/entities.json */ +static immutable entity[] entity_table = +[ + entity( "Æ", [ 198, 0 ] ), + entity( "&", [ 38, 0 ] ), + entity( "Á", [ 193, 0 ] ), + entity( "Ă", [ 258, 0 ] ), + entity( "Â", [ 194, 0 ] ), + entity( "А", [ 1040, 0 ] ), + entity( "𝔄", [ 120068, 0 ] ), + entity( "À", [ 192, 0 ] ), + entity( "Α", [ 913, 0 ] ), + entity( "Ā", [ 256, 0 ] ), + entity( "⩓", [ 10835, 0 ] ), + entity( "Ą", [ 260, 0 ] ), + entity( "𝔸", [ 120120, 0 ] ), + entity( "⁡", [ 8289, 0 ] ), + entity( "Å", [ 197, 0 ] ), + entity( "𝒜", [ 119964, 0 ] ), + entity( "≔", [ 8788, 0 ] ), + entity( "Ã", [ 195, 0 ] ), + entity( "Ä", [ 196, 0 ] ), + entity( "∖", [ 8726, 0 ] ), + entity( "⫧", [ 10983, 0 ] ), + entity( "⌆", [ 8966, 0 ] ), + entity( "Б", [ 1041, 0 ] ), + entity( "∵", [ 8757, 0 ] ), + entity( "ℬ", [ 8492, 0 ] ), + entity( "Β", [ 914, 0 ] ), + entity( "𝔅", [ 120069, 0 ] ), + entity( "𝔹", [ 120121, 0 ] ), + entity( "˘", [ 728, 0 ] ), + entity( "ℬ", [ 8492, 0 ] ), + entity( "≎", [ 8782, 0 ] ), + entity( "Ч", [ 1063, 0 ] ), + entity( "©", [ 169, 0 ] ), + entity( "Ć", [ 262, 0 ] ), + entity( "⋒", [ 8914, 0 ] ), + entity( "ⅅ", [ 8517, 0 ] ), + entity( "ℭ", [ 8493, 0 ] ), + entity( "Č", [ 268, 0 ] ), + entity( "Ç", [ 199, 0 ] ), + entity( "Ĉ", [ 264, 0 ] ), + entity( "∰", [ 8752, 0 ] ), + entity( "Ċ", [ 266, 0 ] ), + entity( "¸", [ 184, 0 ] ), + entity( "·", [ 183, 0 ] ), + entity( "ℭ", [ 8493, 0 ] ), + entity( "Χ", [ 935, 0 ] ), + entity( "⊙", [ 8857, 0 ] ), + entity( "⊖", [ 8854, 0 ] ), + entity( "⊕", [ 8853, 0 ] ), + entity( "⊗", [ 8855, 0 ] ), + entity( "∲", [ 8754, 0 ] ), + entity( "”", [ 8221, 0 ] ), + entity( "’", [ 8217, 0 ] ), + entity( "∷", [ 8759, 0 ] ), + entity( "⩴", [ 10868, 0 ] ), + entity( "≡", [ 8801, 0 ] ), + entity( "∯", [ 8751, 0 ] ), + entity( "∮", [ 8750, 0 ] ), + entity( "ℂ", [ 8450, 0 ] ), + entity( "∐", [ 8720, 0 ] ), + entity( "∳", [ 8755, 0 ] ), + entity( "⨯", [ 10799, 0 ] ), + entity( "𝒞", [ 119966, 0 ] ), + entity( "⋓", [ 8915, 0 ] ), + entity( "≍", [ 8781, 0 ] ), + entity( "ⅅ", [ 8517, 0 ] ), + entity( "⤑", [ 10513, 0 ] ), + entity( "Ђ", [ 1026, 0 ] ), + entity( "Ѕ", [ 1029, 0 ] ), + entity( "Џ", [ 1039, 0 ] ), + entity( "‡", [ 8225, 0 ] ), + entity( "↡", [ 8609, 0 ] ), + entity( "⫤", [ 10980, 0 ] ), + entity( "Ď", [ 270, 0 ] ), + entity( "Д", [ 1044, 0 ] ), + entity( "∇", [ 8711, 0 ] ), + entity( "Δ", [ 916, 0 ] ), + entity( "𝔇", [ 120071, 0 ] ), + entity( "´", [ 180, 0 ] ), + entity( "˙", [ 729, 0 ] ), + entity( "˝", [ 733, 0 ] ), + entity( "`", [ 96, 0 ] ), + entity( "˜", [ 732, 0 ] ), + entity( "⋄", [ 8900, 0 ] ), + entity( "ⅆ", [ 8518, 0 ] ), + entity( "𝔻", [ 120123, 0 ] ), + entity( "¨", [ 168, 0 ] ), + entity( "⃜", [ 8412, 0 ] ), + entity( "≐", [ 8784, 0 ] ), + entity( "∯", [ 8751, 0 ] ), + entity( "¨", [ 168, 0 ] ), + entity( "⇓", [ 8659, 0 ] ), + entity( "⇐", [ 8656, 0 ] ), + entity( "⇔", [ 8660, 0 ] ), + entity( "⫤", [ 10980, 0 ] ), + entity( "⟸", [ 10232, 0 ] ), + entity( "⟺", [ 10234, 0 ] ), + entity( "⟹", [ 10233, 0 ] ), + entity( "⇒", [ 8658, 0 ] ), + entity( "⊨", [ 8872, 0 ] ), + entity( "⇑", [ 8657, 0 ] ), + entity( "⇕", [ 8661, 0 ] ), + entity( "∥", [ 8741, 0 ] ), + entity( "↓", [ 8595, 0 ] ), + entity( "⤓", [ 10515, 0 ] ), + entity( "⇵", [ 8693, 0 ] ), + entity( "̑", [ 785, 0 ] ), + entity( "⥐", [ 10576, 0 ] ), + entity( "⥞", [ 10590, 0 ] ), + entity( "↽", [ 8637, 0 ] ), + entity( "⥖", [ 10582, 0 ] ), + entity( "⥟", [ 10591, 0 ] ), + entity( "⇁", [ 8641, 0 ] ), + entity( "⥗", [ 10583, 0 ] ), + entity( "⊤", [ 8868, 0 ] ), + entity( "↧", [ 8615, 0 ] ), + entity( "⇓", [ 8659, 0 ] ), + entity( "𝒟", [ 119967, 0 ] ), + entity( "Đ", [ 272, 0 ] ), + entity( "Ŋ", [ 330, 0 ] ), + entity( "Ð", [ 208, 0 ] ), + entity( "É", [ 201, 0 ] ), + entity( "Ě", [ 282, 0 ] ), + entity( "Ê", [ 202, 0 ] ), + entity( "Э", [ 1069, 0 ] ), + entity( "Ė", [ 278, 0 ] ), + entity( "𝔈", [ 120072, 0 ] ), + entity( "È", [ 200, 0 ] ), + entity( "∈", [ 8712, 0 ] ), + entity( "Ē", [ 274, 0 ] ), + entity( "◻", [ 9723, 0 ] ), + entity( "▫", [ 9643, 0 ] ), + entity( "Ę", [ 280, 0 ] ), + entity( "𝔼", [ 120124, 0 ] ), + entity( "Ε", [ 917, 0 ] ), + entity( "⩵", [ 10869, 0 ] ), + entity( "≂", [ 8770, 0 ] ), + entity( "⇌", [ 8652, 0 ] ), + entity( "ℰ", [ 8496, 0 ] ), + entity( "⩳", [ 10867, 0 ] ), + entity( "Η", [ 919, 0 ] ), + entity( "Ë", [ 203, 0 ] ), + entity( "∃", [ 8707, 0 ] ), + entity( "ⅇ", [ 8519, 0 ] ), + entity( "Ф", [ 1060, 0 ] ), + entity( "𝔉", [ 120073, 0 ] ), + entity( "◼", [ 9724, 0 ] ), + entity( "▪", [ 9642, 0 ] ), + entity( "𝔽", [ 120125, 0 ] ), + entity( "∀", [ 8704, 0 ] ), + entity( "ℱ", [ 8497, 0 ] ), + entity( "ℱ", [ 8497, 0 ] ), + entity( "Ѓ", [ 1027, 0 ] ), + entity( ">", [ 62, 0 ] ), + entity( "Γ", [ 915, 0 ] ), + entity( "Ϝ", [ 988, 0 ] ), + entity( "Ğ", [ 286, 0 ] ), + entity( "Ģ", [ 290, 0 ] ), + entity( "Ĝ", [ 284, 0 ] ), + entity( "Г", [ 1043, 0 ] ), + entity( "Ġ", [ 288, 0 ] ), + entity( "𝔊", [ 120074, 0 ] ), + entity( "⋙", [ 8921, 0 ] ), + entity( "𝔾", [ 120126, 0 ] ), + entity( "≥", [ 8805, 0 ] ), + entity( "⋛", [ 8923, 0 ] ), + entity( "≧", [ 8807, 0 ] ), + entity( "⪢", [ 10914, 0 ] ), + entity( "≷", [ 8823, 0 ] ), + entity( "⩾", [ 10878, 0 ] ), + entity( "≳", [ 8819, 0 ] ), + entity( "𝒢", [ 119970, 0 ] ), + entity( "≫", [ 8811, 0 ] ), + entity( "Ъ", [ 1066, 0 ] ), + entity( "ˇ", [ 711, 0 ] ), + entity( "^", [ 94, 0 ] ), + entity( "Ĥ", [ 292, 0 ] ), + entity( "ℌ", [ 8460, 0 ] ), + entity( "ℋ", [ 8459, 0 ] ), + entity( "ℍ", [ 8461, 0 ] ), + entity( "─", [ 9472, 0 ] ), + entity( "ℋ", [ 8459, 0 ] ), + entity( "Ħ", [ 294, 0 ] ), + entity( "≎", [ 8782, 0 ] ), + entity( "≏", [ 8783, 0 ] ), + entity( "Е", [ 1045, 0 ] ), + entity( "IJ", [ 306, 0 ] ), + entity( "Ё", [ 1025, 0 ] ), + entity( "Í", [ 205, 0 ] ), + entity( "Î", [ 206, 0 ] ), + entity( "И", [ 1048, 0 ] ), + entity( "İ", [ 304, 0 ] ), + entity( "ℑ", [ 8465, 0 ] ), + entity( "Ì", [ 204, 0 ] ), + entity( "ℑ", [ 8465, 0 ] ), + entity( "Ī", [ 298, 0 ] ), + entity( "ⅈ", [ 8520, 0 ] ), + entity( "⇒", [ 8658, 0 ] ), + entity( "∬", [ 8748, 0 ] ), + entity( "∫", [ 8747, 0 ] ), + entity( "⋂", [ 8898, 0 ] ), + entity( "⁣", [ 8291, 0 ] ), + entity( "⁢", [ 8290, 0 ] ), + entity( "Į", [ 302, 0 ] ), + entity( "𝕀", [ 120128, 0 ] ), + entity( "Ι", [ 921, 0 ] ), + entity( "ℐ", [ 8464, 0 ] ), + entity( "Ĩ", [ 296, 0 ] ), + entity( "І", [ 1030, 0 ] ), + entity( "Ï", [ 207, 0 ] ), + entity( "Ĵ", [ 308, 0 ] ), + entity( "Й", [ 1049, 0 ] ), + entity( "𝔍", [ 120077, 0 ] ), + entity( "𝕁", [ 120129, 0 ] ), + entity( "𝒥", [ 119973, 0 ] ), + entity( "Ј", [ 1032, 0 ] ), + entity( "Є", [ 1028, 0 ] ), + entity( "Х", [ 1061, 0 ] ), + entity( "Ќ", [ 1036, 0 ] ), + entity( "Κ", [ 922, 0 ] ), + entity( "Ķ", [ 310, 0 ] ), + entity( "К", [ 1050, 0 ] ), + entity( "𝔎", [ 120078, 0 ] ), + entity( "𝕂", [ 120130, 0 ] ), + entity( "𝒦", [ 119974, 0 ] ), + entity( "Љ", [ 1033, 0 ] ), + entity( "<", [ 60, 0 ] ), + entity( "Ĺ", [ 313, 0 ] ), + entity( "Λ", [ 923, 0 ] ), + entity( "⟪", [ 10218, 0 ] ), + entity( "ℒ", [ 8466, 0 ] ), + entity( "↞", [ 8606, 0 ] ), + entity( "Ľ", [ 317, 0 ] ), + entity( "Ļ", [ 315, 0 ] ), + entity( "Л", [ 1051, 0 ] ), + entity( "⟨", [ 10216, 0 ] ), + entity( "←", [ 8592, 0 ] ), + entity( "⇤", [ 8676, 0 ] ), + entity( "⇆", [ 8646, 0 ] ), + entity( "⌈", [ 8968, 0 ] ), + entity( "⟦", [ 10214, 0 ] ), + entity( "⥡", [ 10593, 0 ] ), + entity( "⇃", [ 8643, 0 ] ), + entity( "⥙", [ 10585, 0 ] ), + entity( "⌊", [ 8970, 0 ] ), + entity( "↔", [ 8596, 0 ] ), + entity( "⥎", [ 10574, 0 ] ), + entity( "⊣", [ 8867, 0 ] ), + entity( "↤", [ 8612, 0 ] ), + entity( "⥚", [ 10586, 0 ] ), + entity( "⊲", [ 8882, 0 ] ), + entity( "⧏", [ 10703, 0 ] ), + entity( "⊴", [ 8884, 0 ] ), + entity( "⥑", [ 10577, 0 ] ), + entity( "⥠", [ 10592, 0 ] ), + entity( "↿", [ 8639, 0 ] ), + entity( "⥘", [ 10584, 0 ] ), + entity( "↼", [ 8636, 0 ] ), + entity( "⥒", [ 10578, 0 ] ), + entity( "⇐", [ 8656, 0 ] ), + entity( "⇔", [ 8660, 0 ] ), + entity( "⋚", [ 8922, 0 ] ), + entity( "≦", [ 8806, 0 ] ), + entity( "≶", [ 8822, 0 ] ), + entity( "⪡", [ 10913, 0 ] ), + entity( "⩽", [ 10877, 0 ] ), + entity( "≲", [ 8818, 0 ] ), + entity( "𝔏", [ 120079, 0 ] ), + entity( "⋘", [ 8920, 0 ] ), + entity( "⇚", [ 8666, 0 ] ), + entity( "Ŀ", [ 319, 0 ] ), + entity( "⟵", [ 10229, 0 ] ), + entity( "⟷", [ 10231, 0 ] ), + entity( "⟶", [ 10230, 0 ] ), + entity( "⟸", [ 10232, 0 ] ), + entity( "⟺", [ 10234, 0 ] ), + entity( "⟹", [ 10233, 0 ] ), + entity( "𝕃", [ 120131, 0 ] ), + entity( "↙", [ 8601, 0 ] ), + entity( "↘", [ 8600, 0 ] ), + entity( "ℒ", [ 8466, 0 ] ), + entity( "↰", [ 8624, 0 ] ), + entity( "Ł", [ 321, 0 ] ), + entity( "≪", [ 8810, 0 ] ), + entity( "⤅", [ 10501, 0 ] ), + entity( "М", [ 1052, 0 ] ), + entity( " ", [ 8287, 0 ] ), + entity( "ℳ", [ 8499, 0 ] ), + entity( "𝔐", [ 120080, 0 ] ), + entity( "∓", [ 8723, 0 ] ), + entity( "𝕄", [ 120132, 0 ] ), + entity( "ℳ", [ 8499, 0 ] ), + entity( "Μ", [ 924, 0 ] ), + entity( "Њ", [ 1034, 0 ] ), + entity( "Ń", [ 323, 0 ] ), + entity( "Ň", [ 327, 0 ] ), + entity( "Ņ", [ 325, 0 ] ), + entity( "Н", [ 1053, 0 ] ), + entity( "​", [ 8203, 0 ] ), + entity( "​", [ 8203, 0 ] ), + entity( "​", [ 8203, 0 ] ), + entity( "​", [ 8203, 0 ] ), + entity( "≫", [ 8811, 0 ] ), + entity( "≪", [ 8810, 0 ] ), + entity( " ", [ 10, 0 ] ), + entity( "𝔑", [ 120081, 0 ] ), + entity( "⁠", [ 8288, 0 ] ), + entity( " ", [ 160, 0 ] ), + entity( "ℕ", [ 8469, 0 ] ), + entity( "⫬", [ 10988, 0 ] ), + entity( "≢", [ 8802, 0 ] ), + entity( "≭", [ 8813, 0 ] ), + entity( "∦", [ 8742, 0 ] ), + entity( "∉", [ 8713, 0 ] ), + entity( "≠", [ 8800, 0 ] ), + entity( "≂̸", [ 8770, 824 ] ), + entity( "∄", [ 8708, 0 ] ), + entity( "≯", [ 8815, 0 ] ), + entity( "≱", [ 8817, 0 ] ), + entity( "≧̸", [ 8807, 824 ] ), + entity( "≫̸", [ 8811, 824 ] ), + entity( "≹", [ 8825, 0 ] ), + entity( "⩾̸", [ 10878, 824 ] ), + entity( "≵", [ 8821, 0 ] ), + entity( "≎̸", [ 8782, 824 ] ), + entity( "≏̸", [ 8783, 824 ] ), + entity( "⋪", [ 8938, 0 ] ), + entity( "⧏̸", [ 10703, 824 ] ), + entity( "⋬", [ 8940, 0 ] ), + entity( "≮", [ 8814, 0 ] ), + entity( "≰", [ 8816, 0 ] ), + entity( "≸", [ 8824, 0 ] ), + entity( "≪̸", [ 8810, 824 ] ), + entity( "⩽̸", [ 10877, 824 ] ), + entity( "≴", [ 8820, 0 ] ), + entity( "⪢̸", [ 10914, 824 ] ), + entity( "⪡̸", [ 10913, 824 ] ), + entity( "⊀", [ 8832, 0 ] ), + entity( "⪯̸", [ 10927, 824 ] ), + entity( "⋠", [ 8928, 0 ] ), + entity( "∌", [ 8716, 0 ] ), + entity( "⋫", [ 8939, 0 ] ), + entity( "⧐̸", [ 10704, 824 ] ), + entity( "⋭", [ 8941, 0 ] ), + entity( "⊏̸", [ 8847, 824 ] ), + entity( "⋢", [ 8930, 0 ] ), + entity( "⊐̸", [ 8848, 824 ] ), + entity( "⋣", [ 8931, 0 ] ), + entity( "⊂⃒", [ 8834, 8402 ] ), + entity( "⊈", [ 8840, 0 ] ), + entity( "⊁", [ 8833, 0 ] ), + entity( "⪰̸", [ 10928, 824 ] ), + entity( "⋡", [ 8929, 0 ] ), + entity( "≿̸", [ 8831, 824 ] ), + entity( "⊃⃒", [ 8835, 8402 ] ), + entity( "⊉", [ 8841, 0 ] ), + entity( "≁", [ 8769, 0 ] ), + entity( "≄", [ 8772, 0 ] ), + entity( "≇", [ 8775, 0 ] ), + entity( "≉", [ 8777, 0 ] ), + entity( "∤", [ 8740, 0 ] ), + entity( "𝒩", [ 119977, 0 ] ), + entity( "Ñ", [ 209, 0 ] ), + entity( "Ν", [ 925, 0 ] ), + entity( "Œ", [ 338, 0 ] ), + entity( "Ó", [ 211, 0 ] ), + entity( "Ô", [ 212, 0 ] ), + entity( "О", [ 1054, 0 ] ), + entity( "Ő", [ 336, 0 ] ), + entity( "𝔒", [ 120082, 0 ] ), + entity( "Ò", [ 210, 0 ] ), + entity( "Ō", [ 332, 0 ] ), + entity( "Ω", [ 937, 0 ] ), + entity( "Ο", [ 927, 0 ] ), + entity( "𝕆", [ 120134, 0 ] ), + entity( "“", [ 8220, 0 ] ), + entity( "‘", [ 8216, 0 ] ), + entity( "⩔", [ 10836, 0 ] ), + entity( "𝒪", [ 119978, 0 ] ), + entity( "Ø", [ 216, 0 ] ), + entity( "Õ", [ 213, 0 ] ), + entity( "⨷", [ 10807, 0 ] ), + entity( "Ö", [ 214, 0 ] ), + entity( "‾", [ 8254, 0 ] ), + entity( "⏞", [ 9182, 0 ] ), + entity( "⎴", [ 9140, 0 ] ), + entity( "⏜", [ 9180, 0 ] ), + entity( "∂", [ 8706, 0 ] ), + entity( "П", [ 1055, 0 ] ), + entity( "𝔓", [ 120083, 0 ] ), + entity( "Φ", [ 934, 0 ] ), + entity( "Π", [ 928, 0 ] ), + entity( "±", [ 177, 0 ] ), + entity( "ℌ", [ 8460, 0 ] ), + entity( "ℙ", [ 8473, 0 ] ), + entity( "⪻", [ 10939, 0 ] ), + entity( "≺", [ 8826, 0 ] ), + entity( "⪯", [ 10927, 0 ] ), + entity( "≼", [ 8828, 0 ] ), + entity( "≾", [ 8830, 0 ] ), + entity( "″", [ 8243, 0 ] ), + entity( "∏", [ 8719, 0 ] ), + entity( "∷", [ 8759, 0 ] ), + entity( "∝", [ 8733, 0 ] ), + entity( "𝒫", [ 119979, 0 ] ), + entity( "Ψ", [ 936, 0 ] ), + entity( """, [ 34, 0 ] ), + entity( "𝔔", [ 120084, 0 ] ), + entity( "ℚ", [ 8474, 0 ] ), + entity( "𝒬", [ 119980, 0 ] ), + entity( "⤐", [ 10512, 0 ] ), + entity( "®", [ 174, 0 ] ), + entity( "Ŕ", [ 340, 0 ] ), + entity( "⟫", [ 10219, 0 ] ), + entity( "↠", [ 8608, 0 ] ), + entity( "⤖", [ 10518, 0 ] ), + entity( "Ř", [ 344, 0 ] ), + entity( "Ŗ", [ 342, 0 ] ), + entity( "Р", [ 1056, 0 ] ), + entity( "ℜ", [ 8476, 0 ] ), + entity( "∋", [ 8715, 0 ] ), + entity( "⇋", [ 8651, 0 ] ), + entity( "⥯", [ 10607, 0 ] ), + entity( "ℜ", [ 8476, 0 ] ), + entity( "Ρ", [ 929, 0 ] ), + entity( "⟩", [ 10217, 0 ] ), + entity( "→", [ 8594, 0 ] ), + entity( "⇥", [ 8677, 0 ] ), + entity( "⇄", [ 8644, 0 ] ), + entity( "⌉", [ 8969, 0 ] ), + entity( "⟧", [ 10215, 0 ] ), + entity( "⥝", [ 10589, 0 ] ), + entity( "⇂", [ 8642, 0 ] ), + entity( "⥕", [ 10581, 0 ] ), + entity( "⌋", [ 8971, 0 ] ), + entity( "⊢", [ 8866, 0 ] ), + entity( "↦", [ 8614, 0 ] ), + entity( "⥛", [ 10587, 0 ] ), + entity( "⊳", [ 8883, 0 ] ), + entity( "⧐", [ 10704, 0 ] ), + entity( "⊵", [ 8885, 0 ] ), + entity( "⥏", [ 10575, 0 ] ), + entity( "⥜", [ 10588, 0 ] ), + entity( "↾", [ 8638, 0 ] ), + entity( "⥔", [ 10580, 0 ] ), + entity( "⇀", [ 8640, 0 ] ), + entity( "⥓", [ 10579, 0 ] ), + entity( "⇒", [ 8658, 0 ] ), + entity( "ℝ", [ 8477, 0 ] ), + entity( "⥰", [ 10608, 0 ] ), + entity( "⇛", [ 8667, 0 ] ), + entity( "ℛ", [ 8475, 0 ] ), + entity( "↱", [ 8625, 0 ] ), + entity( "⧴", [ 10740, 0 ] ), + entity( "Щ", [ 1065, 0 ] ), + entity( "Ш", [ 1064, 0 ] ), + entity( "Ь", [ 1068, 0 ] ), + entity( "Ś", [ 346, 0 ] ), + entity( "⪼", [ 10940, 0 ] ), + entity( "Š", [ 352, 0 ] ), + entity( "Ş", [ 350, 0 ] ), + entity( "Ŝ", [ 348, 0 ] ), + entity( "С", [ 1057, 0 ] ), + entity( "𝔖", [ 120086, 0 ] ), + entity( "↓", [ 8595, 0 ] ), + entity( "←", [ 8592, 0 ] ), + entity( "→", [ 8594, 0 ] ), + entity( "↑", [ 8593, 0 ] ), + entity( "Σ", [ 931, 0 ] ), + entity( "∘", [ 8728, 0 ] ), + entity( "𝕊", [ 120138, 0 ] ), + entity( "√", [ 8730, 0 ] ), + entity( "□", [ 9633, 0 ] ), + entity( "⊓", [ 8851, 0 ] ), + entity( "⊏", [ 8847, 0 ] ), + entity( "⊑", [ 8849, 0 ] ), + entity( "⊐", [ 8848, 0 ] ), + entity( "⊒", [ 8850, 0 ] ), + entity( "⊔", [ 8852, 0 ] ), + entity( "𝒮", [ 119982, 0 ] ), + entity( "⋆", [ 8902, 0 ] ), + entity( "⋐", [ 8912, 0 ] ), + entity( "⋐", [ 8912, 0 ] ), + entity( "⊆", [ 8838, 0 ] ), + entity( "≻", [ 8827, 0 ] ), + entity( "⪰", [ 10928, 0 ] ), + entity( "≽", [ 8829, 0 ] ), + entity( "≿", [ 8831, 0 ] ), + entity( "∋", [ 8715, 0 ] ), + entity( "∑", [ 8721, 0 ] ), + entity( "⋑", [ 8913, 0 ] ), + entity( "⊃", [ 8835, 0 ] ), + entity( "⊇", [ 8839, 0 ] ), + entity( "⋑", [ 8913, 0 ] ), + entity( "Þ", [ 222, 0 ] ), + entity( "™", [ 8482, 0 ] ), + entity( "Ћ", [ 1035, 0 ] ), + entity( "Ц", [ 1062, 0 ] ), + entity( " ", [ 9, 0 ] ), + entity( "Τ", [ 932, 0 ] ), + entity( "Ť", [ 356, 0 ] ), + entity( "Ţ", [ 354, 0 ] ), + entity( "Т", [ 1058, 0 ] ), + entity( "𝔗", [ 120087, 0 ] ), + entity( "∴", [ 8756, 0 ] ), + entity( "Θ", [ 920, 0 ] ), + entity( "  ", [ 8287, 8202 ] ), + entity( " ", [ 8201, 0 ] ), + entity( "∼", [ 8764, 0 ] ), + entity( "≃", [ 8771, 0 ] ), + entity( "≅", [ 8773, 0 ] ), + entity( "≈", [ 8776, 0 ] ), + entity( "𝕋", [ 120139, 0 ] ), + entity( "⃛", [ 8411, 0 ] ), + entity( "𝒯", [ 119983, 0 ] ), + entity( "Ŧ", [ 358, 0 ] ), + entity( "Ú", [ 218, 0 ] ), + entity( "↟", [ 8607, 0 ] ), + entity( "⥉", [ 10569, 0 ] ), + entity( "Ў", [ 1038, 0 ] ), + entity( "Ŭ", [ 364, 0 ] ), + entity( "Û", [ 219, 0 ] ), + entity( "У", [ 1059, 0 ] ), + entity( "Ű", [ 368, 0 ] ), + entity( "𝔘", [ 120088, 0 ] ), + entity( "Ù", [ 217, 0 ] ), + entity( "Ū", [ 362, 0 ] ), + entity( "_", [ 95, 0 ] ), + entity( "⏟", [ 9183, 0 ] ), + entity( "⎵", [ 9141, 0 ] ), + entity( "⏝", [ 9181, 0 ] ), + entity( "⋃", [ 8899, 0 ] ), + entity( "⊎", [ 8846, 0 ] ), + entity( "Ų", [ 370, 0 ] ), + entity( "𝕌", [ 120140, 0 ] ), + entity( "↑", [ 8593, 0 ] ), + entity( "⤒", [ 10514, 0 ] ), + entity( "⇅", [ 8645, 0 ] ), + entity( "↕", [ 8597, 0 ] ), + entity( "⥮", [ 10606, 0 ] ), + entity( "⊥", [ 8869, 0 ] ), + entity( "↥", [ 8613, 0 ] ), + entity( "⇑", [ 8657, 0 ] ), + entity( "⇕", [ 8661, 0 ] ), + entity( "↖", [ 8598, 0 ] ), + entity( "↗", [ 8599, 0 ] ), + entity( "ϒ", [ 978, 0 ] ), + entity( "Υ", [ 933, 0 ] ), + entity( "Ů", [ 366, 0 ] ), + entity( "𝒰", [ 119984, 0 ] ), + entity( "Ũ", [ 360, 0 ] ), + entity( "Ü", [ 220, 0 ] ), + entity( "⊫", [ 8875, 0 ] ), + entity( "⫫", [ 10987, 0 ] ), + entity( "В", [ 1042, 0 ] ), + entity( "⊩", [ 8873, 0 ] ), + entity( "⫦", [ 10982, 0 ] ), + entity( "⋁", [ 8897, 0 ] ), + entity( "‖", [ 8214, 0 ] ), + entity( "‖", [ 8214, 0 ] ), + entity( "∣", [ 8739, 0 ] ), + entity( "|", [ 124, 0 ] ), + entity( "❘", [ 10072, 0 ] ), + entity( "≀", [ 8768, 0 ] ), + entity( " ", [ 8202, 0 ] ), + entity( "𝔙", [ 120089, 0 ] ), + entity( "𝕍", [ 120141, 0 ] ), + entity( "𝒱", [ 119985, 0 ] ), + entity( "⊪", [ 8874, 0 ] ), + entity( "Ŵ", [ 372, 0 ] ), + entity( "⋀", [ 8896, 0 ] ), + entity( "𝔚", [ 120090, 0 ] ), + entity( "𝕎", [ 120142, 0 ] ), + entity( "𝒲", [ 119986, 0 ] ), + entity( "𝔛", [ 120091, 0 ] ), + entity( "Ξ", [ 926, 0 ] ), + entity( "𝕏", [ 120143, 0 ] ), + entity( "𝒳", [ 119987, 0 ] ), + entity( "Я", [ 1071, 0 ] ), + entity( "Ї", [ 1031, 0 ] ), + entity( "Ю", [ 1070, 0 ] ), + entity( "Ý", [ 221, 0 ] ), + entity( "Ŷ", [ 374, 0 ] ), + entity( "Ы", [ 1067, 0 ] ), + entity( "𝔜", [ 120092, 0 ] ), + entity( "𝕐", [ 120144, 0 ] ), + entity( "𝒴", [ 119988, 0 ] ), + entity( "Ÿ", [ 376, 0 ] ), + entity( "Ж", [ 1046, 0 ] ), + entity( "Ź", [ 377, 0 ] ), + entity( "Ž", [ 381, 0 ] ), + entity( "З", [ 1047, 0 ] ), + entity( "Ż", [ 379, 0 ] ), + entity( "​", [ 8203, 0 ] ), + entity( "Ζ", [ 918, 0 ] ), + entity( "ℨ", [ 8488, 0 ] ), + entity( "ℤ", [ 8484, 0 ] ), + entity( "𝒵", [ 119989, 0 ] ), + entity( "á", [ 225, 0 ] ), + entity( "ă", [ 259, 0 ] ), + entity( "∾", [ 8766, 0 ] ), + entity( "∾̳", [ 8766, 819 ] ), + entity( "∿", [ 8767, 0 ] ), + entity( "â", [ 226, 0 ] ), + entity( "´", [ 180, 0 ] ), + entity( "а", [ 1072, 0 ] ), + entity( "æ", [ 230, 0 ] ), + entity( "⁡", [ 8289, 0 ] ), + entity( "𝔞", [ 120094, 0 ] ), + entity( "à", [ 224, 0 ] ), + entity( "ℵ", [ 8501, 0 ] ), + entity( "ℵ", [ 8501, 0 ] ), + entity( "α", [ 945, 0 ] ), + entity( "ā", [ 257, 0 ] ), + entity( "⨿", [ 10815, 0 ] ), + entity( "&", [ 38, 0 ] ), + entity( "∧", [ 8743, 0 ] ), + entity( "⩕", [ 10837, 0 ] ), + entity( "⩜", [ 10844, 0 ] ), + entity( "⩘", [ 10840, 0 ] ), + entity( "⩚", [ 10842, 0 ] ), + entity( "∠", [ 8736, 0 ] ), + entity( "⦤", [ 10660, 0 ] ), + entity( "∠", [ 8736, 0 ] ), + entity( "∡", [ 8737, 0 ] ), + entity( "⦨", [ 10664, 0 ] ), + entity( "⦩", [ 10665, 0 ] ), + entity( "⦪", [ 10666, 0 ] ), + entity( "⦫", [ 10667, 0 ] ), + entity( "⦬", [ 10668, 0 ] ), + entity( "⦭", [ 10669, 0 ] ), + entity( "⦮", [ 10670, 0 ] ), + entity( "⦯", [ 10671, 0 ] ), + entity( "∟", [ 8735, 0 ] ), + entity( "⊾", [ 8894, 0 ] ), + entity( "⦝", [ 10653, 0 ] ), + entity( "∢", [ 8738, 0 ] ), + entity( "Å", [ 197, 0 ] ), + entity( "⍼", [ 9084, 0 ] ), + entity( "ą", [ 261, 0 ] ), + entity( "𝕒", [ 120146, 0 ] ), + entity( "≈", [ 8776, 0 ] ), + entity( "⩰", [ 10864, 0 ] ), + entity( "⩯", [ 10863, 0 ] ), + entity( "≊", [ 8778, 0 ] ), + entity( "≋", [ 8779, 0 ] ), + entity( "'", [ 39, 0 ] ), + entity( "≈", [ 8776, 0 ] ), + entity( "≊", [ 8778, 0 ] ), + entity( "å", [ 229, 0 ] ), + entity( "𝒶", [ 119990, 0 ] ), + entity( "*", [ 42, 0 ] ), + entity( "≈", [ 8776, 0 ] ), + entity( "≍", [ 8781, 0 ] ), + entity( "ã", [ 227, 0 ] ), + entity( "ä", [ 228, 0 ] ), + entity( "∳", [ 8755, 0 ] ), + entity( "⨑", [ 10769, 0 ] ), + entity( "⫭", [ 10989, 0 ] ), + entity( "≌", [ 8780, 0 ] ), + entity( "϶", [ 1014, 0 ] ), + entity( "‵", [ 8245, 0 ] ), + entity( "∽", [ 8765, 0 ] ), + entity( "⋍", [ 8909, 0 ] ), + entity( "⊽", [ 8893, 0 ] ), + entity( "⌅", [ 8965, 0 ] ), + entity( "⌅", [ 8965, 0 ] ), + entity( "⎵", [ 9141, 0 ] ), + entity( "⎶", [ 9142, 0 ] ), + entity( "≌", [ 8780, 0 ] ), + entity( "б", [ 1073, 0 ] ), + entity( "„", [ 8222, 0 ] ), + entity( "∵", [ 8757, 0 ] ), + entity( "∵", [ 8757, 0 ] ), + entity( "⦰", [ 10672, 0 ] ), + entity( "϶", [ 1014, 0 ] ), + entity( "ℬ", [ 8492, 0 ] ), + entity( "β", [ 946, 0 ] ), + entity( "ℶ", [ 8502, 0 ] ), + entity( "≬", [ 8812, 0 ] ), + entity( "𝔟", [ 120095, 0 ] ), + entity( "⋂", [ 8898, 0 ] ), + entity( "◯", [ 9711, 0 ] ), + entity( "⋃", [ 8899, 0 ] ), + entity( "⨀", [ 10752, 0 ] ), + entity( "⨁", [ 10753, 0 ] ), + entity( "⨂", [ 10754, 0 ] ), + entity( "⨆", [ 10758, 0 ] ), + entity( "★", [ 9733, 0 ] ), + entity( "▽", [ 9661, 0 ] ), + entity( "△", [ 9651, 0 ] ), + entity( "⨄", [ 10756, 0 ] ), + entity( "⋁", [ 8897, 0 ] ), + entity( "⋀", [ 8896, 0 ] ), + entity( "⤍", [ 10509, 0 ] ), + entity( "⧫", [ 10731, 0 ] ), + entity( "▪", [ 9642, 0 ] ), + entity( "▴", [ 9652, 0 ] ), + entity( "▾", [ 9662, 0 ] ), + entity( "◂", [ 9666, 0 ] ), + entity( "▸", [ 9656, 0 ] ), + entity( "␣", [ 9251, 0 ] ), + entity( "▒", [ 9618, 0 ] ), + entity( "░", [ 9617, 0 ] ), + entity( "▓", [ 9619, 0 ] ), + entity( "█", [ 9608, 0 ] ), + entity( "=⃥", [ 61, 8421 ] ), + entity( "≡⃥", [ 8801, 8421 ] ), + entity( "⌐", [ 8976, 0 ] ), + entity( "𝕓", [ 120147, 0 ] ), + entity( "⊥", [ 8869, 0 ] ), + entity( "⊥", [ 8869, 0 ] ), + entity( "⋈", [ 8904, 0 ] ), + entity( "╗", [ 9559, 0 ] ), + entity( "╔", [ 9556, 0 ] ), + entity( "╖", [ 9558, 0 ] ), + entity( "╓", [ 9555, 0 ] ), + entity( "═", [ 9552, 0 ] ), + entity( "╦", [ 9574, 0 ] ), + entity( "╩", [ 9577, 0 ] ), + entity( "╤", [ 9572, 0 ] ), + entity( "╧", [ 9575, 0 ] ), + entity( "╝", [ 9565, 0 ] ), + entity( "╚", [ 9562, 0 ] ), + entity( "╜", [ 9564, 0 ] ), + entity( "╙", [ 9561, 0 ] ), + entity( "║", [ 9553, 0 ] ), + entity( "╬", [ 9580, 0 ] ), + entity( "╣", [ 9571, 0 ] ), + entity( "╠", [ 9568, 0 ] ), + entity( "╫", [ 9579, 0 ] ), + entity( "╢", [ 9570, 0 ] ), + entity( "╟", [ 9567, 0 ] ), + entity( "⧉", [ 10697, 0 ] ), + entity( "╕", [ 9557, 0 ] ), + entity( "╒", [ 9554, 0 ] ), + entity( "┐", [ 9488, 0 ] ), + entity( "┌", [ 9484, 0 ] ), + entity( "─", [ 9472, 0 ] ), + entity( "╥", [ 9573, 0 ] ), + entity( "╨", [ 9576, 0 ] ), + entity( "┬", [ 9516, 0 ] ), + entity( "┴", [ 9524, 0 ] ), + entity( "⊟", [ 8863, 0 ] ), + entity( "⊞", [ 8862, 0 ] ), + entity( "⊠", [ 8864, 0 ] ), + entity( "╛", [ 9563, 0 ] ), + entity( "╘", [ 9560, 0 ] ), + entity( "┘", [ 9496, 0 ] ), + entity( "└", [ 9492, 0 ] ), + entity( "│", [ 9474, 0 ] ), + entity( "╪", [ 9578, 0 ] ), + entity( "╡", [ 9569, 0 ] ), + entity( "╞", [ 9566, 0 ] ), + entity( "┼", [ 9532, 0 ] ), + entity( "┤", [ 9508, 0 ] ), + entity( "├", [ 9500, 0 ] ), + entity( "‵", [ 8245, 0 ] ), + entity( "˘", [ 728, 0 ] ), + entity( "¦", [ 166, 0 ] ), + entity( "𝒷", [ 119991, 0 ] ), + entity( "⁏", [ 8271, 0 ] ), + entity( "∽", [ 8765, 0 ] ), + entity( "⋍", [ 8909, 0 ] ), + entity( "\", [ 92, 0 ] ), + entity( "⧅", [ 10693, 0 ] ), + entity( "⟈", [ 10184, 0 ] ), + entity( "•", [ 8226, 0 ] ), + entity( "•", [ 8226, 0 ] ), + entity( "≎", [ 8782, 0 ] ), + entity( "⪮", [ 10926, 0 ] ), + entity( "≏", [ 8783, 0 ] ), + entity( "≏", [ 8783, 0 ] ), + entity( "ć", [ 263, 0 ] ), + entity( "∩", [ 8745, 0 ] ), + entity( "⩄", [ 10820, 0 ] ), + entity( "⩉", [ 10825, 0 ] ), + entity( "⩋", [ 10827, 0 ] ), + entity( "⩇", [ 10823, 0 ] ), + entity( "⩀", [ 10816, 0 ] ), + entity( "∩︀", [ 8745, 65024 ] ), + entity( "⁁", [ 8257, 0 ] ), + entity( "ˇ", [ 711, 0 ] ), + entity( "⩍", [ 10829, 0 ] ), + entity( "č", [ 269, 0 ] ), + entity( "ç", [ 231, 0 ] ), + entity( "ĉ", [ 265, 0 ] ), + entity( "⩌", [ 10828, 0 ] ), + entity( "⩐", [ 10832, 0 ] ), + entity( "ċ", [ 267, 0 ] ), + entity( "¸", [ 184, 0 ] ), + entity( "⦲", [ 10674, 0 ] ), + entity( "¢", [ 162, 0 ] ), + entity( "·", [ 183, 0 ] ), + entity( "𝔠", [ 120096, 0 ] ), + entity( "ч", [ 1095, 0 ] ), + entity( "✓", [ 10003, 0 ] ), + entity( "✓", [ 10003, 0 ] ), + entity( "χ", [ 967, 0 ] ), + entity( "○", [ 9675, 0 ] ), + entity( "⧃", [ 10691, 0 ] ), + entity( "ˆ", [ 710, 0 ] ), + entity( "≗", [ 8791, 0 ] ), + entity( "↺", [ 8634, 0 ] ), + entity( "↻", [ 8635, 0 ] ), + entity( "®", [ 174, 0 ] ), + entity( "Ⓢ", [ 9416, 0 ] ), + entity( "⊛", [ 8859, 0 ] ), + entity( "⊚", [ 8858, 0 ] ), + entity( "⊝", [ 8861, 0 ] ), + entity( "≗", [ 8791, 0 ] ), + entity( "⨐", [ 10768, 0 ] ), + entity( "⫯", [ 10991, 0 ] ), + entity( "⧂", [ 10690, 0 ] ), + entity( "♣", [ 9827, 0 ] ), + entity( "♣", [ 9827, 0 ] ), + entity( ":", [ 58, 0 ] ), + entity( "≔", [ 8788, 0 ] ), + entity( "≔", [ 8788, 0 ] ), + entity( ",", [ 44, 0 ] ), + entity( "@", [ 64, 0 ] ), + entity( "∁", [ 8705, 0 ] ), + entity( "∘", [ 8728, 0 ] ), + entity( "∁", [ 8705, 0 ] ), + entity( "ℂ", [ 8450, 0 ] ), + entity( "≅", [ 8773, 0 ] ), + entity( "⩭", [ 10861, 0 ] ), + entity( "∮", [ 8750, 0 ] ), + entity( "𝕔", [ 120148, 0 ] ), + entity( "∐", [ 8720, 0 ] ), + entity( "©", [ 169, 0 ] ), + entity( "℗", [ 8471, 0 ] ), + entity( "↵", [ 8629, 0 ] ), + entity( "✗", [ 10007, 0 ] ), + entity( "𝒸", [ 119992, 0 ] ), + entity( "⫏", [ 10959, 0 ] ), + entity( "⫑", [ 10961, 0 ] ), + entity( "⫐", [ 10960, 0 ] ), + entity( "⫒", [ 10962, 0 ] ), + entity( "⋯", [ 8943, 0 ] ), + entity( "⤸", [ 10552, 0 ] ), + entity( "⤵", [ 10549, 0 ] ), + entity( "⋞", [ 8926, 0 ] ), + entity( "⋟", [ 8927, 0 ] ), + entity( "↶", [ 8630, 0 ] ), + entity( "⤽", [ 10557, 0 ] ), + entity( "∪", [ 8746, 0 ] ), + entity( "⩈", [ 10824, 0 ] ), + entity( "⩆", [ 10822, 0 ] ), + entity( "⩊", [ 10826, 0 ] ), + entity( "⊍", [ 8845, 0 ] ), + entity( "⩅", [ 10821, 0 ] ), + entity( "∪︀", [ 8746, 65024 ] ), + entity( "↷", [ 8631, 0 ] ), + entity( "⤼", [ 10556, 0 ] ), + entity( "⋞", [ 8926, 0 ] ), + entity( "⋟", [ 8927, 0 ] ), + entity( "⋎", [ 8910, 0 ] ), + entity( "⋏", [ 8911, 0 ] ), + entity( "¤", [ 164, 0 ] ), + entity( "↶", [ 8630, 0 ] ), + entity( "↷", [ 8631, 0 ] ), + entity( "⋎", [ 8910, 0 ] ), + entity( "⋏", [ 8911, 0 ] ), + entity( "∲", [ 8754, 0 ] ), + entity( "∱", [ 8753, 0 ] ), + entity( "⌭", [ 9005, 0 ] ), + entity( "⇓", [ 8659, 0 ] ), + entity( "⥥", [ 10597, 0 ] ), + entity( "†", [ 8224, 0 ] ), + entity( "ℸ", [ 8504, 0 ] ), + entity( "↓", [ 8595, 0 ] ), + entity( "‐", [ 8208, 0 ] ), + entity( "⊣", [ 8867, 0 ] ), + entity( "⤏", [ 10511, 0 ] ), + entity( "˝", [ 733, 0 ] ), + entity( "ď", [ 271, 0 ] ), + entity( "д", [ 1076, 0 ] ), + entity( "ⅆ", [ 8518, 0 ] ), + entity( "‡", [ 8225, 0 ] ), + entity( "⇊", [ 8650, 0 ] ), + entity( "⩷", [ 10871, 0 ] ), + entity( "°", [ 176, 0 ] ), + entity( "δ", [ 948, 0 ] ), + entity( "⦱", [ 10673, 0 ] ), + entity( "⥿", [ 10623, 0 ] ), + entity( "𝔡", [ 120097, 0 ] ), + entity( "⇃", [ 8643, 0 ] ), + entity( "⇂", [ 8642, 0 ] ), + entity( "⋄", [ 8900, 0 ] ), + entity( "⋄", [ 8900, 0 ] ), + entity( "♦", [ 9830, 0 ] ), + entity( "♦", [ 9830, 0 ] ), + entity( "¨", [ 168, 0 ] ), + entity( "ϝ", [ 989, 0 ] ), + entity( "⋲", [ 8946, 0 ] ), + entity( "÷", [ 247, 0 ] ), + entity( "÷", [ 247, 0 ] ), + entity( "⋇", [ 8903, 0 ] ), + entity( "⋇", [ 8903, 0 ] ), + entity( "ђ", [ 1106, 0 ] ), + entity( "⌞", [ 8990, 0 ] ), + entity( "⌍", [ 8973, 0 ] ), + entity( "$", [ 36, 0 ] ), + entity( "𝕕", [ 120149, 0 ] ), + entity( "˙", [ 729, 0 ] ), + entity( "≐", [ 8784, 0 ] ), + entity( "≑", [ 8785, 0 ] ), + entity( "∸", [ 8760, 0 ] ), + entity( "∔", [ 8724, 0 ] ), + entity( "⊡", [ 8865, 0 ] ), + entity( "⌆", [ 8966, 0 ] ), + entity( "↓", [ 8595, 0 ] ), + entity( "⇊", [ 8650, 0 ] ), + entity( "⇃", [ 8643, 0 ] ), + entity( "⇂", [ 8642, 0 ] ), + entity( "⤐", [ 10512, 0 ] ), + entity( "⌟", [ 8991, 0 ] ), + entity( "⌌", [ 8972, 0 ] ), + entity( "𝒹", [ 119993, 0 ] ), + entity( "ѕ", [ 1109, 0 ] ), + entity( "⧶", [ 10742, 0 ] ), + entity( "đ", [ 273, 0 ] ), + entity( "⋱", [ 8945, 0 ] ), + entity( "▿", [ 9663, 0 ] ), + entity( "▾", [ 9662, 0 ] ), + entity( "⇵", [ 8693, 0 ] ), + entity( "⥯", [ 10607, 0 ] ), + entity( "⦦", [ 10662, 0 ] ), + entity( "џ", [ 1119, 0 ] ), + entity( "⟿", [ 10239, 0 ] ), + entity( "⩷", [ 10871, 0 ] ), + entity( "≑", [ 8785, 0 ] ), + entity( "é", [ 233, 0 ] ), + entity( "⩮", [ 10862, 0 ] ), + entity( "ě", [ 283, 0 ] ), + entity( "≖", [ 8790, 0 ] ), + entity( "ê", [ 234, 0 ] ), + entity( "≕", [ 8789, 0 ] ), + entity( "э", [ 1101, 0 ] ), + entity( "ė", [ 279, 0 ] ), + entity( "ⅇ", [ 8519, 0 ] ), + entity( "≒", [ 8786, 0 ] ), + entity( "𝔢", [ 120098, 0 ] ), + entity( "⪚", [ 10906, 0 ] ), + entity( "è", [ 232, 0 ] ), + entity( "⪖", [ 10902, 0 ] ), + entity( "⪘", [ 10904, 0 ] ), + entity( "⪙", [ 10905, 0 ] ), + entity( "⏧", [ 9191, 0 ] ), + entity( "ℓ", [ 8467, 0 ] ), + entity( "⪕", [ 10901, 0 ] ), + entity( "⪗", [ 10903, 0 ] ), + entity( "ē", [ 275, 0 ] ), + entity( "∅", [ 8709, 0 ] ), + entity( "∅", [ 8709, 0 ] ), + entity( "∅", [ 8709, 0 ] ), + entity( " ", [ 8196, 0 ] ), + entity( " ", [ 8197, 0 ] ), + entity( " ", [ 8195, 0 ] ), + entity( "ŋ", [ 331, 0 ] ), + entity( " ", [ 8194, 0 ] ), + entity( "ę", [ 281, 0 ] ), + entity( "𝕖", [ 120150, 0 ] ), + entity( "⋕", [ 8917, 0 ] ), + entity( "⧣", [ 10723, 0 ] ), + entity( "⩱", [ 10865, 0 ] ), + entity( "ε", [ 949, 0 ] ), + entity( "ε", [ 949, 0 ] ), + entity( "ϵ", [ 1013, 0 ] ), + entity( "≖", [ 8790, 0 ] ), + entity( "≕", [ 8789, 0 ] ), + entity( "≂", [ 8770, 0 ] ), + entity( "⪖", [ 10902, 0 ] ), + entity( "⪕", [ 10901, 0 ] ), + entity( "=", [ 61, 0 ] ), + entity( "≟", [ 8799, 0 ] ), + entity( "≡", [ 8801, 0 ] ), + entity( "⩸", [ 10872, 0 ] ), + entity( "⧥", [ 10725, 0 ] ), + entity( "≓", [ 8787, 0 ] ), + entity( "⥱", [ 10609, 0 ] ), + entity( "ℯ", [ 8495, 0 ] ), + entity( "≐", [ 8784, 0 ] ), + entity( "≂", [ 8770, 0 ] ), + entity( "η", [ 951, 0 ] ), + entity( "ð", [ 240, 0 ] ), + entity( "ë", [ 235, 0 ] ), + entity( "€", [ 8364, 0 ] ), + entity( "!", [ 33, 0 ] ), + entity( "∃", [ 8707, 0 ] ), + entity( "ℰ", [ 8496, 0 ] ), + entity( "ⅇ", [ 8519, 0 ] ), + entity( "≒", [ 8786, 0 ] ), + entity( "ф", [ 1092, 0 ] ), + entity( "♀", [ 9792, 0 ] ), + entity( "ffi", [ 64259, 0 ] ), + entity( "ff", [ 64256, 0 ] ), + entity( "ffl", [ 64260, 0 ] ), + entity( "𝔣", [ 120099, 0 ] ), + entity( "fi", [ 64257, 0 ] ), + entity( "fj", [ 102, 106 ] ), + entity( "♭", [ 9837, 0 ] ), + entity( "fl", [ 64258, 0 ] ), + entity( "▱", [ 9649, 0 ] ), + entity( "ƒ", [ 402, 0 ] ), + entity( "𝕗", [ 120151, 0 ] ), + entity( "∀", [ 8704, 0 ] ), + entity( "⋔", [ 8916, 0 ] ), + entity( "⫙", [ 10969, 0 ] ), + entity( "⨍", [ 10765, 0 ] ), + entity( "½", [ 189, 0 ] ), + entity( "½", [ 189, 0 ] ), + entity( "⅓", [ 8531, 0 ] ), + entity( "¼", [ 188, 0 ] ), + entity( "¼", [ 188, 0 ] ), + entity( "⅕", [ 8533, 0 ] ), + entity( "⅙", [ 8537, 0 ] ), + entity( "⅛", [ 8539, 0 ] ), + entity( "⅔", [ 8532, 0 ] ), + entity( "⅖", [ 8534, 0 ] ), + entity( "¾", [ 190, 0 ] ), + entity( "¾", [ 190, 0 ] ), + entity( "⅗", [ 8535, 0 ] ), + entity( "⅜", [ 8540, 0 ] ), + entity( "⅘", [ 8536, 0 ] ), + entity( "⅚", [ 8538, 0 ] ), + entity( "⅝", [ 8541, 0 ] ), + entity( "⅞", [ 8542, 0 ] ), + entity( "⁄", [ 8260, 0 ] ), + entity( "⌢", [ 8994, 0 ] ), + entity( "𝒻", [ 119995, 0 ] ), + entity( "≧", [ 8807, 0 ] ), + entity( "⪌", [ 10892, 0 ] ), + entity( "ǵ", [ 501, 0 ] ), + entity( "γ", [ 947, 0 ] ), + entity( "ϝ", [ 989, 0 ] ), + entity( "⪆", [ 10886, 0 ] ), + entity( "ğ", [ 287, 0 ] ), + entity( "ĝ", [ 285, 0 ] ), + entity( "г", [ 1075, 0 ] ), + entity( "ġ", [ 289, 0 ] ), + entity( "≥", [ 8805, 0 ] ), + entity( "⋛", [ 8923, 0 ] ), + entity( "≥", [ 8805, 0 ] ), + entity( "≧", [ 8807, 0 ] ), + entity( "⩾", [ 10878, 0 ] ), + entity( "⩾", [ 10878, 0 ] ), + entity( "⪩", [ 10921, 0 ] ), + entity( "⪀", [ 10880, 0 ] ), + entity( "⪂", [ 10882, 0 ] ), + entity( "⪄", [ 10884, 0 ] ), + entity( "⋛︀", [ 8923, 65024 ] ), + entity( "⪔", [ 10900, 0 ] ), + entity( "𝔤", [ 120100, 0 ] ), + entity( "≫", [ 8811, 0 ] ), + entity( "⋙", [ 8921, 0 ] ), + entity( "ℷ", [ 8503, 0 ] ), + entity( "ѓ", [ 1107, 0 ] ), + entity( "≷", [ 8823, 0 ] ), + entity( "⪒", [ 10898, 0 ] ), + entity( "⪥", [ 10917, 0 ] ), + entity( "⪤", [ 10916, 0 ] ), + entity( "≩", [ 8809, 0 ] ), + entity( "⪊", [ 10890, 0 ] ), + entity( "⪊", [ 10890, 0 ] ), + entity( "⪈", [ 10888, 0 ] ), + entity( "⪈", [ 10888, 0 ] ), + entity( "≩", [ 8809, 0 ] ), + entity( "⋧", [ 8935, 0 ] ), + entity( "𝕘", [ 120152, 0 ] ), + entity( "`", [ 96, 0 ] ), + entity( "ℊ", [ 8458, 0 ] ), + entity( "≳", [ 8819, 0 ] ), + entity( "⪎", [ 10894, 0 ] ), + entity( "⪐", [ 10896, 0 ] ), + entity( ">", [ 62, 0 ] ), + entity( "⪧", [ 10919, 0 ] ), + entity( "⩺", [ 10874, 0 ] ), + entity( "⋗", [ 8919, 0 ] ), + entity( "⦕", [ 10645, 0 ] ), + entity( "⩼", [ 10876, 0 ] ), + entity( "⪆", [ 10886, 0 ] ), + entity( "⥸", [ 10616, 0 ] ), + entity( "⋗", [ 8919, 0 ] ), + entity( "⋛", [ 8923, 0 ] ), + entity( "⪌", [ 10892, 0 ] ), + entity( "≷", [ 8823, 0 ] ), + entity( "≳", [ 8819, 0 ] ), + entity( "≩︀", [ 8809, 65024 ] ), + entity( "≩︀", [ 8809, 65024 ] ), + entity( "⇔", [ 8660, 0 ] ), + entity( " ", [ 8202, 0 ] ), + entity( "½", [ 189, 0 ] ), + entity( "ℋ", [ 8459, 0 ] ), + entity( "ъ", [ 1098, 0 ] ), + entity( "↔", [ 8596, 0 ] ), + entity( "⥈", [ 10568, 0 ] ), + entity( "↭", [ 8621, 0 ] ), + entity( "ℏ", [ 8463, 0 ] ), + entity( "ĥ", [ 293, 0 ] ), + entity( "♥", [ 9829, 0 ] ), + entity( "♥", [ 9829, 0 ] ), + entity( "…", [ 8230, 0 ] ), + entity( "⊹", [ 8889, 0 ] ), + entity( "𝔥", [ 120101, 0 ] ), + entity( "⤥", [ 10533, 0 ] ), + entity( "⤦", [ 10534, 0 ] ), + entity( "⇿", [ 8703, 0 ] ), + entity( "∻", [ 8763, 0 ] ), + entity( "↩", [ 8617, 0 ] ), + entity( "↪", [ 8618, 0 ] ), + entity( "𝕙", [ 120153, 0 ] ), + entity( "―", [ 8213, 0 ] ), + entity( "𝒽", [ 119997, 0 ] ), + entity( "ℏ", [ 8463, 0 ] ), + entity( "ħ", [ 295, 0 ] ), + entity( "⁃", [ 8259, 0 ] ), + entity( "‐", [ 8208, 0 ] ), + entity( "í", [ 237, 0 ] ), + entity( "⁣", [ 8291, 0 ] ), + entity( "î", [ 238, 0 ] ), + entity( "и", [ 1080, 0 ] ), + entity( "е", [ 1077, 0 ] ), + entity( "¡", [ 161, 0 ] ), + entity( "⇔", [ 8660, 0 ] ), + entity( "𝔦", [ 120102, 0 ] ), + entity( "ì", [ 236, 0 ] ), + entity( "ⅈ", [ 8520, 0 ] ), + entity( "⨌", [ 10764, 0 ] ), + entity( "∭", [ 8749, 0 ] ), + entity( "⧜", [ 10716, 0 ] ), + entity( "℩", [ 8489, 0 ] ), + entity( "ij", [ 307, 0 ] ), + entity( "ī", [ 299, 0 ] ), + entity( "ℑ", [ 8465, 0 ] ), + entity( "ℐ", [ 8464, 0 ] ), + entity( "ℑ", [ 8465, 0 ] ), + entity( "ı", [ 305, 0 ] ), + entity( "⊷", [ 8887, 0 ] ), + entity( "Ƶ", [ 437, 0 ] ), + entity( "∈", [ 8712, 0 ] ), + entity( "℅", [ 8453, 0 ] ), + entity( "∞", [ 8734, 0 ] ), + entity( "⧝", [ 10717, 0 ] ), + entity( "ı", [ 305, 0 ] ), + entity( "∫", [ 8747, 0 ] ), + entity( "⊺", [ 8890, 0 ] ), + entity( "ℤ", [ 8484, 0 ] ), + entity( "⊺", [ 8890, 0 ] ), + entity( "⨗", [ 10775, 0 ] ), + entity( "⨼", [ 10812, 0 ] ), + entity( "ё", [ 1105, 0 ] ), + entity( "į", [ 303, 0 ] ), + entity( "𝕚", [ 120154, 0 ] ), + entity( "ι", [ 953, 0 ] ), + entity( "⨼", [ 10812, 0 ] ), + entity( "¿", [ 191, 0 ] ), + entity( "𝒾", [ 119998, 0 ] ), + entity( "∈", [ 8712, 0 ] ), + entity( "⋹", [ 8953, 0 ] ), + entity( "⋵", [ 8949, 0 ] ), + entity( "⋴", [ 8948, 0 ] ), + entity( "⋳", [ 8947, 0 ] ), + entity( "∈", [ 8712, 0 ] ), + entity( "⁢", [ 8290, 0 ] ), + entity( "ĩ", [ 297, 0 ] ), + entity( "і", [ 1110, 0 ] ), + entity( "ï", [ 239, 0 ] ), + entity( "ĵ", [ 309, 0 ] ), + entity( "й", [ 1081, 0 ] ), + entity( "𝔧", [ 120103, 0 ] ), + entity( "ȷ", [ 567, 0 ] ), + entity( "𝕛", [ 120155, 0 ] ), + entity( "𝒿", [ 119999, 0 ] ), + entity( "ј", [ 1112, 0 ] ), + entity( "є", [ 1108, 0 ] ), + entity( "κ", [ 954, 0 ] ), + entity( "ϰ", [ 1008, 0 ] ), + entity( "ķ", [ 311, 0 ] ), + entity( "к", [ 1082, 0 ] ), + entity( "𝔨", [ 120104, 0 ] ), + entity( "ĸ", [ 312, 0 ] ), + entity( "х", [ 1093, 0 ] ), + entity( "ќ", [ 1116, 0 ] ), + entity( "𝕜", [ 120156, 0 ] ), + entity( "𝓀", [ 120000, 0 ] ), + entity( "⇚", [ 8666, 0 ] ), + entity( "⇐", [ 8656, 0 ] ), + entity( "⤛", [ 10523, 0 ] ), + entity( "⤎", [ 10510, 0 ] ), + entity( "≦", [ 8806, 0 ] ), + entity( "⪋", [ 10891, 0 ] ), + entity( "⥢", [ 10594, 0 ] ), + entity( "ĺ", [ 314, 0 ] ), + entity( "⦴", [ 10676, 0 ] ), + entity( "ℒ", [ 8466, 0 ] ), + entity( "λ", [ 955, 0 ] ), + entity( "⟨", [ 10216, 0 ] ), + entity( "⦑", [ 10641, 0 ] ), + entity( "⟨", [ 10216, 0 ] ), + entity( "⪅", [ 10885, 0 ] ), + entity( "«", [ 171, 0 ] ), + entity( "←", [ 8592, 0 ] ), + entity( "⇤", [ 8676, 0 ] ), + entity( "⤟", [ 10527, 0 ] ), + entity( "⤝", [ 10525, 0 ] ), + entity( "↩", [ 8617, 0 ] ), + entity( "↫", [ 8619, 0 ] ), + entity( "⤹", [ 10553, 0 ] ), + entity( "⥳", [ 10611, 0 ] ), + entity( "↢", [ 8610, 0 ] ), + entity( "⪫", [ 10923, 0 ] ), + entity( "⤙", [ 10521, 0 ] ), + entity( "⪭", [ 10925, 0 ] ), + entity( "⪭︀", [ 10925, 65024 ] ), + entity( "⤌", [ 10508, 0 ] ), + entity( "❲", [ 10098, 0 ] ), + entity( "{", [ 123, 0 ] ), + entity( "[", [ 91, 0 ] ), + entity( "⦋", [ 10635, 0 ] ), + entity( "⦏", [ 10639, 0 ] ), + entity( "⦍", [ 10637, 0 ] ), + entity( "ľ", [ 318, 0 ] ), + entity( "ļ", [ 316, 0 ] ), + entity( "⌈", [ 8968, 0 ] ), + entity( "{", [ 123, 0 ] ), + entity( "л", [ 1083, 0 ] ), + entity( "⤶", [ 10550, 0 ] ), + entity( "“", [ 8220, 0 ] ), + entity( "„", [ 8222, 0 ] ), + entity( "⥧", [ 10599, 0 ] ), + entity( "⥋", [ 10571, 0 ] ), + entity( "↲", [ 8626, 0 ] ), + entity( "≤", [ 8804, 0 ] ), + entity( "←", [ 8592, 0 ] ), + entity( "↢", [ 8610, 0 ] ), + entity( "↽", [ 8637, 0 ] ), + entity( "↼", [ 8636, 0 ] ), + entity( "⇇", [ 8647, 0 ] ), + entity( "↔", [ 8596, 0 ] ), + entity( "⇆", [ 8646, 0 ] ), + entity( "⇋", [ 8651, 0 ] ), + entity( "↭", [ 8621, 0 ] ), + entity( "⋋", [ 8907, 0 ] ), + entity( "⋚", [ 8922, 0 ] ), + entity( "≤", [ 8804, 0 ] ), + entity( "≦", [ 8806, 0 ] ), + entity( "⩽", [ 10877, 0 ] ), + entity( "⩽", [ 10877, 0 ] ), + entity( "⪨", [ 10920, 0 ] ), + entity( "⩿", [ 10879, 0 ] ), + entity( "⪁", [ 10881, 0 ] ), + entity( "⪃", [ 10883, 0 ] ), + entity( "⋚︀", [ 8922, 65024 ] ), + entity( "⪓", [ 10899, 0 ] ), + entity( "⪅", [ 10885, 0 ] ), + entity( "⋖", [ 8918, 0 ] ), + entity( "⋚", [ 8922, 0 ] ), + entity( "⪋", [ 10891, 0 ] ), + entity( "≶", [ 8822, 0 ] ), + entity( "≲", [ 8818, 0 ] ), + entity( "⥼", [ 10620, 0 ] ), + entity( "⌊", [ 8970, 0 ] ), + entity( "𝔩", [ 120105, 0 ] ), + entity( "≶", [ 8822, 0 ] ), + entity( "⪑", [ 10897, 0 ] ), + entity( "↽", [ 8637, 0 ] ), + entity( "↼", [ 8636, 0 ] ), + entity( "⥪", [ 10602, 0 ] ), + entity( "▄", [ 9604, 0 ] ), + entity( "љ", [ 1113, 0 ] ), + entity( "≪", [ 8810, 0 ] ), + entity( "⇇", [ 8647, 0 ] ), + entity( "⌞", [ 8990, 0 ] ), + entity( "⥫", [ 10603, 0 ] ), + entity( "◺", [ 9722, 0 ] ), + entity( "ŀ", [ 320, 0 ] ), + entity( "⎰", [ 9136, 0 ] ), + entity( "⎰", [ 9136, 0 ] ), + entity( "≨", [ 8808, 0 ] ), + entity( "⪉", [ 10889, 0 ] ), + entity( "⪉", [ 10889, 0 ] ), + entity( "⪇", [ 10887, 0 ] ), + entity( "⪇", [ 10887, 0 ] ), + entity( "≨", [ 8808, 0 ] ), + entity( "⋦", [ 8934, 0 ] ), + entity( "⟬", [ 10220, 0 ] ), + entity( "⇽", [ 8701, 0 ] ), + entity( "⟦", [ 10214, 0 ] ), + entity( "⟵", [ 10229, 0 ] ), + entity( "⟷", [ 10231, 0 ] ), + entity( "⟼", [ 10236, 0 ] ), + entity( "⟶", [ 10230, 0 ] ), + entity( "↫", [ 8619, 0 ] ), + entity( "↬", [ 8620, 0 ] ), + entity( "⦅", [ 10629, 0 ] ), + entity( "𝕝", [ 120157, 0 ] ), + entity( "⨭", [ 10797, 0 ] ), + entity( "⨴", [ 10804, 0 ] ), + entity( "∗", [ 8727, 0 ] ), + entity( "_", [ 95, 0 ] ), + entity( "◊", [ 9674, 0 ] ), + entity( "◊", [ 9674, 0 ] ), + entity( "⧫", [ 10731, 0 ] ), + entity( "(", [ 40, 0 ] ), + entity( "⦓", [ 10643, 0 ] ), + entity( "⇆", [ 8646, 0 ] ), + entity( "⌟", [ 8991, 0 ] ), + entity( "⇋", [ 8651, 0 ] ), + entity( "⥭", [ 10605, 0 ] ), + entity( "‎", [ 8206, 0 ] ), + entity( "⊿", [ 8895, 0 ] ), + entity( "‹", [ 8249, 0 ] ), + entity( "𝓁", [ 120001, 0 ] ), + entity( "↰", [ 8624, 0 ] ), + entity( "≲", [ 8818, 0 ] ), + entity( "⪍", [ 10893, 0 ] ), + entity( "⪏", [ 10895, 0 ] ), + entity( "[", [ 91, 0 ] ), + entity( "‘", [ 8216, 0 ] ), + entity( "‚", [ 8218, 0 ] ), + entity( "ł", [ 322, 0 ] ), + entity( "<", [ 60, 0 ] ), + entity( "⪦", [ 10918, 0 ] ), + entity( "⩹", [ 10873, 0 ] ), + entity( "⋖", [ 8918, 0 ] ), + entity( "⋋", [ 8907, 0 ] ), + entity( "⋉", [ 8905, 0 ] ), + entity( "⥶", [ 10614, 0 ] ), + entity( "⩻", [ 10875, 0 ] ), + entity( "⦖", [ 10646, 0 ] ), + entity( "◃", [ 9667, 0 ] ), + entity( "⊴", [ 8884, 0 ] ), + entity( "◂", [ 9666, 0 ] ), + entity( "⥊", [ 10570, 0 ] ), + entity( "⥦", [ 10598, 0 ] ), + entity( "≨︀", [ 8808, 65024 ] ), + entity( "≨︀", [ 8808, 65024 ] ), + entity( "∺", [ 8762, 0 ] ), + entity( "¯", [ 175, 0 ] ), + entity( "♂", [ 9794, 0 ] ), + entity( "✠", [ 10016, 0 ] ), + entity( "✠", [ 10016, 0 ] ), + entity( "↦", [ 8614, 0 ] ), + entity( "↦", [ 8614, 0 ] ), + entity( "↧", [ 8615, 0 ] ), + entity( "↤", [ 8612, 0 ] ), + entity( "↥", [ 8613, 0 ] ), + entity( "▮", [ 9646, 0 ] ), + entity( "⨩", [ 10793, 0 ] ), + entity( "м", [ 1084, 0 ] ), + entity( "—", [ 8212, 0 ] ), + entity( "∡", [ 8737, 0 ] ), + entity( "𝔪", [ 120106, 0 ] ), + entity( "℧", [ 8487, 0 ] ), + entity( "µ", [ 181, 0 ] ), + entity( "∣", [ 8739, 0 ] ), + entity( "*", [ 42, 0 ] ), + entity( "⫰", [ 10992, 0 ] ), + entity( "·", [ 183, 0 ] ), + entity( "−", [ 8722, 0 ] ), + entity( "⊟", [ 8863, 0 ] ), + entity( "∸", [ 8760, 0 ] ), + entity( "⨪", [ 10794, 0 ] ), + entity( "⫛", [ 10971, 0 ] ), + entity( "…", [ 8230, 0 ] ), + entity( "∓", [ 8723, 0 ] ), + entity( "⊧", [ 8871, 0 ] ), + entity( "𝕞", [ 120158, 0 ] ), + entity( "∓", [ 8723, 0 ] ), + entity( "𝓂", [ 120002, 0 ] ), + entity( "∾", [ 8766, 0 ] ), + entity( "μ", [ 956, 0 ] ), + entity( "⊸", [ 8888, 0 ] ), + entity( "⊸", [ 8888, 0 ] ), + entity( "⋙̸", [ 8921, 824 ] ), + entity( "≫⃒", [ 8811, 8402 ] ), + entity( "≫̸", [ 8811, 824 ] ), + entity( "⇍", [ 8653, 0 ] ), + entity( "⇎", [ 8654, 0 ] ), + entity( "⋘̸", [ 8920, 824 ] ), + entity( "≪⃒", [ 8810, 8402 ] ), + entity( "≪̸", [ 8810, 824 ] ), + entity( "⇏", [ 8655, 0 ] ), + entity( "⊯", [ 8879, 0 ] ), + entity( "⊮", [ 8878, 0 ] ), + entity( "∇", [ 8711, 0 ] ), + entity( "ń", [ 324, 0 ] ), + entity( "∠⃒", [ 8736, 8402 ] ), + entity( "≉", [ 8777, 0 ] ), + entity( "⩰̸", [ 10864, 824 ] ), + entity( "≋̸", [ 8779, 824 ] ), + entity( "ʼn", [ 329, 0 ] ), + entity( "≉", [ 8777, 0 ] ), + entity( "♮", [ 9838, 0 ] ), + entity( "♮", [ 9838, 0 ] ), + entity( "ℕ", [ 8469, 0 ] ), + entity( " ", [ 160, 0 ] ), + entity( "≎̸", [ 8782, 824 ] ), + entity( "≏̸", [ 8783, 824 ] ), + entity( "⩃", [ 10819, 0 ] ), + entity( "ň", [ 328, 0 ] ), + entity( "ņ", [ 326, 0 ] ), + entity( "≇", [ 8775, 0 ] ), + entity( "⩭̸", [ 10861, 824 ] ), + entity( "⩂", [ 10818, 0 ] ), + entity( "н", [ 1085, 0 ] ), + entity( "–", [ 8211, 0 ] ), + entity( "≠", [ 8800, 0 ] ), + entity( "⇗", [ 8663, 0 ] ), + entity( "⤤", [ 10532, 0 ] ), + entity( "↗", [ 8599, 0 ] ), + entity( "↗", [ 8599, 0 ] ), + entity( "≐̸", [ 8784, 824 ] ), + entity( "≢", [ 8802, 0 ] ), + entity( "⤨", [ 10536, 0 ] ), + entity( "≂̸", [ 8770, 824 ] ), + entity( "∄", [ 8708, 0 ] ), + entity( "∄", [ 8708, 0 ] ), + entity( "𝔫", [ 120107, 0 ] ), + entity( "≧̸", [ 8807, 824 ] ), + entity( "≱", [ 8817, 0 ] ), + entity( "≱", [ 8817, 0 ] ), + entity( "≧̸", [ 8807, 824 ] ), + entity( "⩾̸", [ 10878, 824 ] ), + entity( "⩾̸", [ 10878, 824 ] ), + entity( "≵", [ 8821, 0 ] ), + entity( "≯", [ 8815, 0 ] ), + entity( "≯", [ 8815, 0 ] ), + entity( "⇎", [ 8654, 0 ] ), + entity( "↮", [ 8622, 0 ] ), + entity( "⫲", [ 10994, 0 ] ), + entity( "∋", [ 8715, 0 ] ), + entity( "⋼", [ 8956, 0 ] ), + entity( "⋺", [ 8954, 0 ] ), + entity( "∋", [ 8715, 0 ] ), + entity( "њ", [ 1114, 0 ] ), + entity( "⇍", [ 8653, 0 ] ), + entity( "≦̸", [ 8806, 824 ] ), + entity( "↚", [ 8602, 0 ] ), + entity( "‥", [ 8229, 0 ] ), + entity( "≰", [ 8816, 0 ] ), + entity( "↚", [ 8602, 0 ] ), + entity( "↮", [ 8622, 0 ] ), + entity( "≰", [ 8816, 0 ] ), + entity( "≦̸", [ 8806, 824 ] ), + entity( "⩽̸", [ 10877, 824 ] ), + entity( "⩽̸", [ 10877, 824 ] ), + entity( "≮", [ 8814, 0 ] ), + entity( "≴", [ 8820, 0 ] ), + entity( "≮", [ 8814, 0 ] ), + entity( "⋪", [ 8938, 0 ] ), + entity( "⋬", [ 8940, 0 ] ), + entity( "∤", [ 8740, 0 ] ), + entity( "𝕟", [ 120159, 0 ] ), + entity( "¬", [ 172, 0 ] ), + entity( "∉", [ 8713, 0 ] ), + entity( "⋹̸", [ 8953, 824 ] ), + entity( "⋵̸", [ 8949, 824 ] ), + entity( "∉", [ 8713, 0 ] ), + entity( "⋷", [ 8951, 0 ] ), + entity( "⋶", [ 8950, 0 ] ), + entity( "∌", [ 8716, 0 ] ), + entity( "∌", [ 8716, 0 ] ), + entity( "⋾", [ 8958, 0 ] ), + entity( "⋽", [ 8957, 0 ] ), + entity( "∦", [ 8742, 0 ] ), + entity( "∦", [ 8742, 0 ] ), + entity( "⫽⃥", [ 11005, 8421 ] ), + entity( "∂̸", [ 8706, 824 ] ), + entity( "⨔", [ 10772, 0 ] ), + entity( "⊀", [ 8832, 0 ] ), + entity( "⋠", [ 8928, 0 ] ), + entity( "⪯̸", [ 10927, 824 ] ), + entity( "⊀", [ 8832, 0 ] ), + entity( "⪯̸", [ 10927, 824 ] ), + entity( "⇏", [ 8655, 0 ] ), + entity( "↛", [ 8603, 0 ] ), + entity( "⤳̸", [ 10547, 824 ] ), + entity( "↝̸", [ 8605, 824 ] ), + entity( "↛", [ 8603, 0 ] ), + entity( "⋫", [ 8939, 0 ] ), + entity( "⋭", [ 8941, 0 ] ), + entity( "⊁", [ 8833, 0 ] ), + entity( "⋡", [ 8929, 0 ] ), + entity( "⪰̸", [ 10928, 824 ] ), + entity( "𝓃", [ 120003, 0 ] ), + entity( "∤", [ 8740, 0 ] ), + entity( "∦", [ 8742, 0 ] ), + entity( "≁", [ 8769, 0 ] ), + entity( "≄", [ 8772, 0 ] ), + entity( "≄", [ 8772, 0 ] ), + entity( "∤", [ 8740, 0 ] ), + entity( "∦", [ 8742, 0 ] ), + entity( "⋢", [ 8930, 0 ] ), + entity( "⋣", [ 8931, 0 ] ), + entity( "⊄", [ 8836, 0 ] ), + entity( "⫅̸", [ 10949, 824 ] ), + entity( "⊈", [ 8840, 0 ] ), + entity( "⊂⃒", [ 8834, 8402 ] ), + entity( "⊈", [ 8840, 0 ] ), + entity( "⫅̸", [ 10949, 824 ] ), + entity( "⊁", [ 8833, 0 ] ), + entity( "⪰̸", [ 10928, 824 ] ), + entity( "⊅", [ 8837, 0 ] ), + entity( "⫆̸", [ 10950, 824 ] ), + entity( "⊉", [ 8841, 0 ] ), + entity( "⊃⃒", [ 8835, 8402 ] ), + entity( "⊉", [ 8841, 0 ] ), + entity( "⫆̸", [ 10950, 824 ] ), + entity( "≹", [ 8825, 0 ] ), + entity( "ñ", [ 241, 0 ] ), + entity( "≸", [ 8824, 0 ] ), + entity( "⋪", [ 8938, 0 ] ), + entity( "⋬", [ 8940, 0 ] ), + entity( "⋫", [ 8939, 0 ] ), + entity( "⋭", [ 8941, 0 ] ), + entity( "ν", [ 957, 0 ] ), + entity( "#", [ 35, 0 ] ), + entity( "№", [ 8470, 0 ] ), + entity( " ", [ 8199, 0 ] ), + entity( "⊭", [ 8877, 0 ] ), + entity( "⤄", [ 10500, 0 ] ), + entity( "≍⃒", [ 8781, 8402 ] ), + entity( "⊬", [ 8876, 0 ] ), + entity( "≥⃒", [ 8805, 8402 ] ), + entity( ">⃒", [ 62, 8402 ] ), + entity( "⧞", [ 10718, 0 ] ), + entity( "⤂", [ 10498, 0 ] ), + entity( "≤⃒", [ 8804, 8402 ] ), + entity( "<⃒", [ 60, 8402 ] ), + entity( "⊴⃒", [ 8884, 8402 ] ), + entity( "⤃", [ 10499, 0 ] ), + entity( "⊵⃒", [ 8885, 8402 ] ), + entity( "∼⃒", [ 8764, 8402 ] ), + entity( "⇖", [ 8662, 0 ] ), + entity( "⤣", [ 10531, 0 ] ), + entity( "↖", [ 8598, 0 ] ), + entity( "↖", [ 8598, 0 ] ), + entity( "⤧", [ 10535, 0 ] ), + entity( "Ⓢ", [ 9416, 0 ] ), + entity( "ó", [ 243, 0 ] ), + entity( "⊛", [ 8859, 0 ] ), + entity( "⊚", [ 8858, 0 ] ), + entity( "ô", [ 244, 0 ] ), + entity( "о", [ 1086, 0 ] ), + entity( "⊝", [ 8861, 0 ] ), + entity( "ő", [ 337, 0 ] ), + entity( "⨸", [ 10808, 0 ] ), + entity( "⊙", [ 8857, 0 ] ), + entity( "⦼", [ 10684, 0 ] ), + entity( "œ", [ 339, 0 ] ), + entity( "⦿", [ 10687, 0 ] ), + entity( "𝔬", [ 120108, 0 ] ), + entity( "˛", [ 731, 0 ] ), + entity( "ò", [ 242, 0 ] ), + entity( "⧁", [ 10689, 0 ] ), + entity( "⦵", [ 10677, 0 ] ), + entity( "Ω", [ 937, 0 ] ), + entity( "∮", [ 8750, 0 ] ), + entity( "↺", [ 8634, 0 ] ), + entity( "⦾", [ 10686, 0 ] ), + entity( "⦻", [ 10683, 0 ] ), + entity( "‾", [ 8254, 0 ] ), + entity( "⧀", [ 10688, 0 ] ), + entity( "ō", [ 333, 0 ] ), + entity( "ω", [ 969, 0 ] ), + entity( "ο", [ 959, 0 ] ), + entity( "⦶", [ 10678, 0 ] ), + entity( "⊖", [ 8854, 0 ] ), + entity( "𝕠", [ 120160, 0 ] ), + entity( "⦷", [ 10679, 0 ] ), + entity( "⦹", [ 10681, 0 ] ), + entity( "⊕", [ 8853, 0 ] ), + entity( "∨", [ 8744, 0 ] ), + entity( "↻", [ 8635, 0 ] ), + entity( "⩝", [ 10845, 0 ] ), + entity( "ℴ", [ 8500, 0 ] ), + entity( "ℴ", [ 8500, 0 ] ), + entity( "ª", [ 170, 0 ] ), + entity( "º", [ 186, 0 ] ), + entity( "⊶", [ 8886, 0 ] ), + entity( "⩖", [ 10838, 0 ] ), + entity( "⩗", [ 10839, 0 ] ), + entity( "⩛", [ 10843, 0 ] ), + entity( "ℴ", [ 8500, 0 ] ), + entity( "ø", [ 248, 0 ] ), + entity( "⊘", [ 8856, 0 ] ), + entity( "õ", [ 245, 0 ] ), + entity( "⊗", [ 8855, 0 ] ), + entity( "⨶", [ 10806, 0 ] ), + entity( "ö", [ 246, 0 ] ), + entity( "⌽", [ 9021, 0 ] ), + entity( "∥", [ 8741, 0 ] ), + entity( "¶", [ 182, 0 ] ), + entity( "∥", [ 8741, 0 ] ), + entity( "⫳", [ 10995, 0 ] ), + entity( "⫽", [ 11005, 0 ] ), + entity( "∂", [ 8706, 0 ] ), + entity( "п", [ 1087, 0 ] ), + entity( "%", [ 37, 0 ] ), + entity( ".", [ 46, 0 ] ), + entity( "‰", [ 8240, 0 ] ), + entity( "⊥", [ 8869, 0 ] ), + entity( "‱", [ 8241, 0 ] ), + entity( "𝔭", [ 120109, 0 ] ), + entity( "φ", [ 966, 0 ] ), + entity( "ϕ", [ 981, 0 ] ), + entity( "ℳ", [ 8499, 0 ] ), + entity( "☎", [ 9742, 0 ] ), + entity( "π", [ 960, 0 ] ), + entity( "⋔", [ 8916, 0 ] ), + entity( "ϖ", [ 982, 0 ] ), + entity( "ℏ", [ 8463, 0 ] ), + entity( "ℎ", [ 8462, 0 ] ), + entity( "ℏ", [ 8463, 0 ] ), + entity( "+", [ 43, 0 ] ), + entity( "⨣", [ 10787, 0 ] ), + entity( "⊞", [ 8862, 0 ] ), + entity( "⨢", [ 10786, 0 ] ), + entity( "∔", [ 8724, 0 ] ), + entity( "⨥", [ 10789, 0 ] ), + entity( "⩲", [ 10866, 0 ] ), + entity( "±", [ 177, 0 ] ), + entity( "⨦", [ 10790, 0 ] ), + entity( "⨧", [ 10791, 0 ] ), + entity( "±", [ 177, 0 ] ), + entity( "⨕", [ 10773, 0 ] ), + entity( "𝕡", [ 120161, 0 ] ), + entity( "£", [ 163, 0 ] ), + entity( "≺", [ 8826, 0 ] ), + entity( "⪳", [ 10931, 0 ] ), + entity( "⪷", [ 10935, 0 ] ), + entity( "≼", [ 8828, 0 ] ), + entity( "⪯", [ 10927, 0 ] ), + entity( "≺", [ 8826, 0 ] ), + entity( "⪷", [ 10935, 0 ] ), + entity( "≼", [ 8828, 0 ] ), + entity( "⪯", [ 10927, 0 ] ), + entity( "⪹", [ 10937, 0 ] ), + entity( "⪵", [ 10933, 0 ] ), + entity( "⋨", [ 8936, 0 ] ), + entity( "≾", [ 8830, 0 ] ), + entity( "′", [ 8242, 0 ] ), + entity( "ℙ", [ 8473, 0 ] ), + entity( "⪵", [ 10933, 0 ] ), + entity( "⪹", [ 10937, 0 ] ), + entity( "⋨", [ 8936, 0 ] ), + entity( "∏", [ 8719, 0 ] ), + entity( "⌮", [ 9006, 0 ] ), + entity( "⌒", [ 8978, 0 ] ), + entity( "⌓", [ 8979, 0 ] ), + entity( "∝", [ 8733, 0 ] ), + entity( "∝", [ 8733, 0 ] ), + entity( "≾", [ 8830, 0 ] ), + entity( "⊰", [ 8880, 0 ] ), + entity( "𝓅", [ 120005, 0 ] ), + entity( "ψ", [ 968, 0 ] ), + entity( " ", [ 8200, 0 ] ), + entity( "𝔮", [ 120110, 0 ] ), + entity( "⨌", [ 10764, 0 ] ), + entity( "𝕢", [ 120162, 0 ] ), + entity( "⁗", [ 8279, 0 ] ), + entity( "𝓆", [ 120006, 0 ] ), + entity( "ℍ", [ 8461, 0 ] ), + entity( "⨖", [ 10774, 0 ] ), + entity( "?", [ 63, 0 ] ), + entity( "≟", [ 8799, 0 ] ), + entity( """, [ 34, 0 ] ), + entity( "⇛", [ 8667, 0 ] ), + entity( "⇒", [ 8658, 0 ] ), + entity( "⤜", [ 10524, 0 ] ), + entity( "⤏", [ 10511, 0 ] ), + entity( "⥤", [ 10596, 0 ] ), + entity( "∽̱", [ 8765, 817 ] ), + entity( "ŕ", [ 341, 0 ] ), + entity( "√", [ 8730, 0 ] ), + entity( "⦳", [ 10675, 0 ] ), + entity( "⟩", [ 10217, 0 ] ), + entity( "⦒", [ 10642, 0 ] ), + entity( "⦥", [ 10661, 0 ] ), + entity( "⟩", [ 10217, 0 ] ), + entity( "»", [ 187, 0 ] ), + entity( "→", [ 8594, 0 ] ), + entity( "⥵", [ 10613, 0 ] ), + entity( "⇥", [ 8677, 0 ] ), + entity( "⤠", [ 10528, 0 ] ), + entity( "⤳", [ 10547, 0 ] ), + entity( "⤞", [ 10526, 0 ] ), + entity( "↪", [ 8618, 0 ] ), + entity( "↬", [ 8620, 0 ] ), + entity( "⥅", [ 10565, 0 ] ), + entity( "⥴", [ 10612, 0 ] ), + entity( "↣", [ 8611, 0 ] ), + entity( "↝", [ 8605, 0 ] ), + entity( "⤚", [ 10522, 0 ] ), + entity( "∶", [ 8758, 0 ] ), + entity( "ℚ", [ 8474, 0 ] ), + entity( "⤍", [ 10509, 0 ] ), + entity( "❳", [ 10099, 0 ] ), + entity( "}", [ 125, 0 ] ), + entity( "]", [ 93, 0 ] ), + entity( "⦌", [ 10636, 0 ] ), + entity( "⦎", [ 10638, 0 ] ), + entity( "⦐", [ 10640, 0 ] ), + entity( "ř", [ 345, 0 ] ), + entity( "ŗ", [ 343, 0 ] ), + entity( "⌉", [ 8969, 0 ] ), + entity( "}", [ 125, 0 ] ), + entity( "р", [ 1088, 0 ] ), + entity( "⤷", [ 10551, 0 ] ), + entity( "⥩", [ 10601, 0 ] ), + entity( "”", [ 8221, 0 ] ), + entity( "”", [ 8221, 0 ] ), + entity( "↳", [ 8627, 0 ] ), + entity( "ℜ", [ 8476, 0 ] ), + entity( "ℛ", [ 8475, 0 ] ), + entity( "ℜ", [ 8476, 0 ] ), + entity( "ℝ", [ 8477, 0 ] ), + entity( "▭", [ 9645, 0 ] ), + entity( "®", [ 174, 0 ] ), + entity( "⥽", [ 10621, 0 ] ), + entity( "⌋", [ 8971, 0 ] ), + entity( "𝔯", [ 120111, 0 ] ), + entity( "⇁", [ 8641, 0 ] ), + entity( "⇀", [ 8640, 0 ] ), + entity( "⥬", [ 10604, 0 ] ), + entity( "ρ", [ 961, 0 ] ), + entity( "ϱ", [ 1009, 0 ] ), + entity( "→", [ 8594, 0 ] ), + entity( "↣", [ 8611, 0 ] ), + entity( "⇁", [ 8641, 0 ] ), + entity( "⇀", [ 8640, 0 ] ), + entity( "⇄", [ 8644, 0 ] ), + entity( "⇌", [ 8652, 0 ] ), + entity( "⇉", [ 8649, 0 ] ), + entity( "↝", [ 8605, 0 ] ), + entity( "⋌", [ 8908, 0 ] ), + entity( "˚", [ 730, 0 ] ), + entity( "≓", [ 8787, 0 ] ), + entity( "⇄", [ 8644, 0 ] ), + entity( "⇌", [ 8652, 0 ] ), + entity( "‏", [ 8207, 0 ] ), + entity( "⎱", [ 9137, 0 ] ), + entity( "⎱", [ 9137, 0 ] ), + entity( "⫮", [ 10990, 0 ] ), + entity( "⟭", [ 10221, 0 ] ), + entity( "⇾", [ 8702, 0 ] ), + entity( "⟧", [ 10215, 0 ] ), + entity( "⦆", [ 10630, 0 ] ), + entity( "𝕣", [ 120163, 0 ] ), + entity( "⨮", [ 10798, 0 ] ), + entity( "⨵", [ 10805, 0 ] ), + entity( ")", [ 41, 0 ] ), + entity( "⦔", [ 10644, 0 ] ), + entity( "⨒", [ 10770, 0 ] ), + entity( "⇉", [ 8649, 0 ] ), + entity( "›", [ 8250, 0 ] ), + entity( "𝓇", [ 120007, 0 ] ), + entity( "↱", [ 8625, 0 ] ), + entity( "]", [ 93, 0 ] ), + entity( "’", [ 8217, 0 ] ), + entity( "’", [ 8217, 0 ] ), + entity( "⋌", [ 8908, 0 ] ), + entity( "⋊", [ 8906, 0 ] ), + entity( "▹", [ 9657, 0 ] ), + entity( "⊵", [ 8885, 0 ] ), + entity( "▸", [ 9656, 0 ] ), + entity( "⧎", [ 10702, 0 ] ), + entity( "⥨", [ 10600, 0 ] ), + entity( "℞", [ 8478, 0 ] ), + entity( "ś", [ 347, 0 ] ), + entity( "‚", [ 8218, 0 ] ), + entity( "≻", [ 8827, 0 ] ), + entity( "⪴", [ 10932, 0 ] ), + entity( "⪸", [ 10936, 0 ] ), + entity( "š", [ 353, 0 ] ), + entity( "≽", [ 8829, 0 ] ), + entity( "⪰", [ 10928, 0 ] ), + entity( "ş", [ 351, 0 ] ), + entity( "ŝ", [ 349, 0 ] ), + entity( "⪶", [ 10934, 0 ] ), + entity( "⪺", [ 10938, 0 ] ), + entity( "⋩", [ 8937, 0 ] ), + entity( "⨓", [ 10771, 0 ] ), + entity( "≿", [ 8831, 0 ] ), + entity( "с", [ 1089, 0 ] ), + entity( "⋅", [ 8901, 0 ] ), + entity( "⊡", [ 8865, 0 ] ), + entity( "⩦", [ 10854, 0 ] ), + entity( "⇘", [ 8664, 0 ] ), + entity( "⤥", [ 10533, 0 ] ), + entity( "↘", [ 8600, 0 ] ), + entity( "↘", [ 8600, 0 ] ), + entity( "§", [ 167, 0 ] ), + entity( ";", [ 59, 0 ] ), + entity( "⤩", [ 10537, 0 ] ), + entity( "∖", [ 8726, 0 ] ), + entity( "∖", [ 8726, 0 ] ), + entity( "✶", [ 10038, 0 ] ), + entity( "𝔰", [ 120112, 0 ] ), + entity( "⌢", [ 8994, 0 ] ), + entity( "♯", [ 9839, 0 ] ), + entity( "щ", [ 1097, 0 ] ), + entity( "ш", [ 1096, 0 ] ), + entity( "∣", [ 8739, 0 ] ), + entity( "∥", [ 8741, 0 ] ), + entity( "­", [ 173, 0 ] ), + entity( "σ", [ 963, 0 ] ), + entity( "ς", [ 962, 0 ] ), + entity( "ς", [ 962, 0 ] ), + entity( "∼", [ 8764, 0 ] ), + entity( "⩪", [ 10858, 0 ] ), + entity( "≃", [ 8771, 0 ] ), + entity( "≃", [ 8771, 0 ] ), + entity( "⪞", [ 10910, 0 ] ), + entity( "⪠", [ 10912, 0 ] ), + entity( "⪝", [ 10909, 0 ] ), + entity( "⪟", [ 10911, 0 ] ), + entity( "≆", [ 8774, 0 ] ), + entity( "⨤", [ 10788, 0 ] ), + entity( "⥲", [ 10610, 0 ] ), + entity( "←", [ 8592, 0 ] ), + entity( "∖", [ 8726, 0 ] ), + entity( "⨳", [ 10803, 0 ] ), + entity( "⧤", [ 10724, 0 ] ), + entity( "∣", [ 8739, 0 ] ), + entity( "⌣", [ 8995, 0 ] ), + entity( "⪪", [ 10922, 0 ] ), + entity( "⪬", [ 10924, 0 ] ), + entity( "⪬︀", [ 10924, 65024 ] ), + entity( "ь", [ 1100, 0 ] ), + entity( "/", [ 47, 0 ] ), + entity( "⧄", [ 10692, 0 ] ), + entity( "⌿", [ 9023, 0 ] ), + entity( "𝕤", [ 120164, 0 ] ), + entity( "♠", [ 9824, 0 ] ), + entity( "♠", [ 9824, 0 ] ), + entity( "∥", [ 8741, 0 ] ), + entity( "⊓", [ 8851, 0 ] ), + entity( "⊓︀", [ 8851, 65024 ] ), + entity( "⊔", [ 8852, 0 ] ), + entity( "⊔︀", [ 8852, 65024 ] ), + entity( "⊏", [ 8847, 0 ] ), + entity( "⊑", [ 8849, 0 ] ), + entity( "⊏", [ 8847, 0 ] ), + entity( "⊑", [ 8849, 0 ] ), + entity( "⊐", [ 8848, 0 ] ), + entity( "⊒", [ 8850, 0 ] ), + entity( "⊐", [ 8848, 0 ] ), + entity( "⊒", [ 8850, 0 ] ), + entity( "□", [ 9633, 0 ] ), + entity( "□", [ 9633, 0 ] ), + entity( "▪", [ 9642, 0 ] ), + entity( "▪", [ 9642, 0 ] ), + entity( "→", [ 8594, 0 ] ), + entity( "𝓈", [ 120008, 0 ] ), + entity( "∖", [ 8726, 0 ] ), + entity( "⌣", [ 8995, 0 ] ), + entity( "⋆", [ 8902, 0 ] ), + entity( "☆", [ 9734, 0 ] ), + entity( "★", [ 9733, 0 ] ), + entity( "ϵ", [ 1013, 0 ] ), + entity( "ϕ", [ 981, 0 ] ), + entity( "¯", [ 175, 0 ] ), + entity( "⊂", [ 8834, 0 ] ), + entity( "⫅", [ 10949, 0 ] ), + entity( "⪽", [ 10941, 0 ] ), + entity( "⊆", [ 8838, 0 ] ), + entity( "⫃", [ 10947, 0 ] ), + entity( "⫁", [ 10945, 0 ] ), + entity( "⫋", [ 10955, 0 ] ), + entity( "⊊", [ 8842, 0 ] ), + entity( "⪿", [ 10943, 0 ] ), + entity( "⥹", [ 10617, 0 ] ), + entity( "⊂", [ 8834, 0 ] ), + entity( "⊆", [ 8838, 0 ] ), + entity( "⫅", [ 10949, 0 ] ), + entity( "⊊", [ 8842, 0 ] ), + entity( "⫋", [ 10955, 0 ] ), + entity( "⫇", [ 10951, 0 ] ), + entity( "⫕", [ 10965, 0 ] ), + entity( "⫓", [ 10963, 0 ] ), + entity( "≻", [ 8827, 0 ] ), + entity( "⪸", [ 10936, 0 ] ), + entity( "≽", [ 8829, 0 ] ), + entity( "⪰", [ 10928, 0 ] ), + entity( "⪺", [ 10938, 0 ] ), + entity( "⪶", [ 10934, 0 ] ), + entity( "⋩", [ 8937, 0 ] ), + entity( "≿", [ 8831, 0 ] ), + entity( "∑", [ 8721, 0 ] ), + entity( "♪", [ 9834, 0 ] ), + entity( "¹", [185, 0 ] ), + entity( "¹", [ 185, 0 ] ), + entity( "²", [178, 0 ] ), + entity( "²", [ 178, 0 ] ), + entity( "³", [179, 0 ] ), + entity( "³", [ 179, 0 ] ), + entity( "⊃", [ 8835, 0 ] ), + entity( "⫆", [ 10950, 0 ] ), + entity( "⪾", [ 10942, 0 ] ), + entity( "⫘", [ 10968, 0 ] ), + entity( "⊇", [ 8839, 0 ] ), + entity( "⫄", [ 10948, 0 ] ), + entity( "⟉", [ 10185, 0 ] ), + entity( "⫗", [ 10967, 0 ] ), + entity( "⥻", [ 10619, 0 ] ), + entity( "⫂", [ 10946, 0 ] ), + entity( "⫌", [ 10956, 0 ] ), + entity( "⊋", [ 8843, 0 ] ), + entity( "⫀", [ 10944, 0 ] ), + entity( "⊃", [ 8835, 0 ] ), + entity( "⊇", [ 8839, 0 ] ), + entity( "⫆", [ 10950, 0 ] ), + entity( "⊋", [ 8843, 0 ] ), + entity( "⫌", [ 10956, 0 ] ), + entity( "⫈", [ 10952, 0 ] ), + entity( "⫔", [ 10964, 0 ] ), + entity( "⫖", [ 10966, 0 ] ), + entity( "⇙", [ 8665, 0 ] ), + entity( "⤦", [ 10534, 0 ] ), + entity( "↙", [ 8601, 0 ] ), + entity( "↙", [ 8601, 0 ] ), + entity( "⤪", [ 10538, 0 ] ), + entity( "ß", [ 223, 0 ] ), + entity( "⌖", [ 8982, 0 ] ), + entity( "τ", [ 964, 0 ] ), + entity( "⎴", [ 9140, 0 ] ), + entity( "ť", [ 357, 0 ] ), + entity( "ţ", [ 355, 0 ] ), + entity( "т", [ 1090, 0 ] ), + entity( "⃛", [ 8411, 0 ] ), + entity( "⌕", [ 8981, 0 ] ), + entity( "𝔱", [ 120113, 0 ] ), + entity( "∴", [ 8756, 0 ] ), + entity( "∴", [ 8756, 0 ] ), + entity( "θ", [ 952, 0 ] ), + entity( "ϑ", [ 977, 0 ] ), + entity( "ϑ", [ 977, 0 ] ), + entity( "≈", [ 8776, 0 ] ), + entity( "∼", [ 8764, 0 ] ), + entity( " ", [ 8201, 0 ] ), + entity( "≈", [ 8776, 0 ] ), + entity( "∼", [ 8764, 0 ] ), + entity( "þ", [ 254, 0 ] ), + entity( "˜", [ 732, 0 ] ), + entity( "×", [ 215, 0 ] ), + entity( "⊠", [ 8864, 0 ] ), + entity( "⨱", [ 10801, 0 ] ), + entity( "⨰", [ 10800, 0 ] ), + entity( "∭", [ 8749, 0 ] ), + entity( "⤨", [ 10536, 0 ] ), + entity( "⊤", [ 8868, 0 ] ), + entity( "⌶", [ 9014, 0 ] ), + entity( "⫱", [ 10993, 0 ] ), + entity( "𝕥", [ 120165, 0 ] ), + entity( "⫚", [ 10970, 0 ] ), + entity( "⤩", [ 10537, 0 ] ), + entity( "‴", [ 8244, 0 ] ), + entity( "™", [ 8482, 0 ] ), + entity( "▵", [ 9653, 0 ] ), + entity( "▿", [ 9663, 0 ] ), + entity( "◃", [ 9667, 0 ] ), + entity( "⊴", [ 8884, 0 ] ), + entity( "≜", [ 8796, 0 ] ), + entity( "▹", [ 9657, 0 ] ), + entity( "⊵", [ 8885, 0 ] ), + entity( "◬", [ 9708, 0 ] ), + entity( "≜", [ 8796, 0 ] ), + entity( "⨺", [ 10810, 0 ] ), + entity( "⨹", [ 10809, 0 ] ), + entity( "⧍", [ 10701, 0 ] ), + entity( "⨻", [ 10811, 0 ] ), + entity( "⏢", [ 9186, 0 ] ), + entity( "𝓉", [ 120009, 0 ] ), + entity( "ц", [ 1094, 0 ] ), + entity( "ћ", [ 1115, 0 ] ), + entity( "ŧ", [ 359, 0 ] ), + entity( "≬", [ 8812, 0 ] ), + entity( "↞", [ 8606, 0 ] ), + entity( "↠", [ 8608, 0 ] ), + entity( "⇑", [ 8657, 0 ] ), + entity( "⥣", [ 10595, 0 ] ), + entity( "ú", [ 250, 0 ] ), + entity( "↑", [ 8593, 0 ] ), + entity( "ў", [ 1118, 0 ] ), + entity( "ŭ", [ 365, 0 ] ), + entity( "û", [ 251, 0 ] ), + entity( "у", [ 1091, 0 ] ), + entity( "⇅", [ 8645, 0 ] ), + entity( "ű", [ 369, 0 ] ), + entity( "⥮", [ 10606, 0 ] ), + entity( "⥾", [ 10622, 0 ] ), + entity( "𝔲", [ 120114, 0 ] ), + entity( "ù", [ 249, 0 ] ), + entity( "↿", [ 8639, 0 ] ), + entity( "↾", [ 8638, 0 ] ), + entity( "▀", [ 9600, 0 ] ), + entity( "⌜", [ 8988, 0 ] ), + entity( "⌜", [ 8988, 0 ] ), + entity( "⌏", [ 8975, 0 ] ), + entity( "◸", [ 9720, 0 ] ), + entity( "ū", [ 363, 0 ] ), + entity( "¨", [ 168, 0 ] ), + entity( "ų", [ 371, 0 ] ), + entity( "𝕦", [ 120166, 0 ] ), + entity( "↑", [ 8593, 0 ] ), + entity( "↕", [ 8597, 0 ] ), + entity( "↿", [ 8639, 0 ] ), + entity( "↾", [ 8638, 0 ] ), + entity( "⊎", [ 8846, 0 ] ), + entity( "υ", [ 965, 0 ] ), + entity( "ϒ", [ 978, 0 ] ), + entity( "υ", [ 965, 0 ] ), + entity( "⇈", [ 8648, 0 ] ), + entity( "⌝", [ 8989, 0 ] ), + entity( "⌝", [ 8989, 0 ] ), + entity( "⌎", [ 8974, 0 ] ), + entity( "ů", [ 367, 0 ] ), + entity( "◹", [ 9721, 0 ] ), + entity( "𝓊", [ 120010, 0 ] ), + entity( "⋰", [ 8944, 0 ] ), + entity( "ũ", [ 361, 0 ] ), + entity( "▵", [ 9653, 0 ] ), + entity( "▴", [ 9652, 0 ] ), + entity( "⇈", [ 8648, 0 ] ), + entity( "ü", [ 252, 0 ] ), + entity( "⦧", [ 10663, 0 ] ), + entity( "⇕", [ 8661, 0 ] ), + entity( "⫨", [ 10984, 0 ] ), + entity( "⫩", [ 10985, 0 ] ), + entity( "⊨", [ 8872, 0 ] ), + entity( "⦜", [ 10652, 0 ] ), + entity( "ϵ", [ 1013, 0 ] ), + entity( "ϰ", [ 1008, 0 ] ), + entity( "∅", [ 8709, 0 ] ), + entity( "ϕ", [ 981, 0 ] ), + entity( "ϖ", [ 982, 0 ] ), + entity( "∝", [ 8733, 0 ] ), + entity( "↕", [ 8597, 0 ] ), + entity( "ϱ", [ 1009, 0 ] ), + entity( "ς", [ 962, 0 ] ), + entity( "⊊︀", [ 8842, 65024 ] ), + entity( "⫋︀", [ 10955, 65024 ] ), + entity( "⊋︀", [ 8843, 65024 ] ), + entity( "⫌︀", [ 10956, 65024 ] ), + entity( "ϑ", [ 977, 0 ] ), + entity( "⊲", [ 8882, 0 ] ), + entity( "⊳", [ 8883, 0 ] ), + entity( "в", [ 1074, 0 ] ), + entity( "⊢", [ 8866, 0 ] ), + entity( "∨", [ 8744, 0 ] ), + entity( "⊻", [ 8891, 0 ] ), + entity( "≚", [ 8794, 0 ] ), + entity( "⋮", [ 8942, 0 ] ), + entity( "|", [ 124, 0 ] ), + entity( "|", [ 124, 0 ] ), + entity( "𝔳", [ 120115, 0 ] ), + entity( "⊲", [ 8882, 0 ] ), + entity( "⊂⃒", [ 8834, 8402 ] ), + entity( "⊃⃒", [ 8835, 8402 ] ), + entity( "𝕧", [ 120167, 0 ] ), + entity( "∝", [ 8733, 0 ] ), + entity( "⊳", [ 8883, 0 ] ), + entity( "𝓋", [ 120011, 0 ] ), + entity( "⫋︀", [ 10955, 65024 ] ), + entity( "⊊︀", [ 8842, 65024 ] ), + entity( "⫌︀", [ 10956, 65024 ] ), + entity( "⊋︀", [ 8843, 65024 ] ), + entity( "⦚", [ 10650, 0 ] ), + entity( "ŵ", [ 373, 0 ] ), + entity( "⩟", [ 10847, 0 ] ), + entity( "∧", [ 8743, 0 ] ), + entity( "≙", [ 8793, 0 ] ), + entity( "℘", [ 8472, 0 ] ), + entity( "𝔴", [ 120116, 0 ] ), + entity( "𝕨", [ 120168, 0 ] ), + entity( "℘", [ 8472, 0 ] ), + entity( "≀", [ 8768, 0 ] ), + entity( "≀", [ 8768, 0 ] ), + entity( "𝓌", [ 120012, 0 ] ), + entity( "⋂", [ 8898, 0 ] ), + entity( "◯", [ 9711, 0 ] ), + entity( "⋃", [ 8899, 0 ] ), + entity( "▽", [ 9661, 0 ] ), + entity( "𝔵", [ 120117, 0 ] ), + entity( "⟺", [ 10234, 0 ] ), + entity( "⟷", [ 10231, 0 ] ), + entity( "ξ", [ 958, 0 ] ), + entity( "⟸", [ 10232, 0 ] ), + entity( "⟵", [ 10229, 0 ] ), + entity( "⟼", [ 10236, 0 ] ), + entity( "⋻", [ 8955, 0 ] ), + entity( "⨀", [ 10752, 0 ] ), + entity( "𝕩", [ 120169, 0 ] ), + entity( "⨁", [ 10753, 0 ] ), + entity( "⨂", [ 10754, 0 ] ), + entity( "⟹", [ 10233, 0 ] ), + entity( "⟶", [ 10230, 0 ] ), + entity( "𝓍", [ 120013, 0 ] ), + entity( "⨆", [ 10758, 0 ] ), + entity( "⨄", [ 10756, 0 ] ), + entity( "△", [ 9651, 0 ] ), + entity( "⋁", [ 8897, 0 ] ), + entity( "⋀", [ 8896, 0 ] ), + entity( "ý", [ 253, 0 ] ), + entity( "я", [ 1103, 0 ] ), + entity( "ŷ", [ 375, 0 ] ), + entity( "ы", [ 1099, 0 ] ), + entity( "¥", [ 165, 0 ] ), + entity( "𝔶", [ 120118, 0 ] ), + entity( "ї", [ 1111, 0 ] ), + entity( "𝕪", [ 120170, 0 ] ), + entity( "𝓎", [ 120014, 0 ] ), + entity( "ю", [ 1102, 0 ] ), + entity( "ÿ", [ 255, 0 ] ), + entity( "ź", [ 378, 0 ] ), + entity( "ž", [ 382, 0 ] ), + entity( "з", [ 1079, 0 ] ), + entity( "ż", [ 380, 0 ] ), + entity( "ℨ", [ 8488, 0 ] ), + entity( "ζ", [ 950, 0 ] ), + entity( "𝔷", [ 120119, 0 ] ), + entity( "ж", [ 1078, 0 ] ), + entity( "⇝", [ 8669, 0 ] ), + entity( "𝕫", [ 120171, 0 ] ), + entity( "𝓏", [ 120015, 0 ] ), + entity( "‍", [ 8205, 0 ] ), + entity( "‌", [ 8204, 0 ] ), +]; + + +struct entity_key +{ + const(char)* name; + size_t name_size; +} + +extern(C) int entity_cmp(scope const(void)* p_key, scope const(void)* p_entity) +{ + entity_key* key = cast(entity_key*) p_key; + entity* ent = cast(entity*) p_entity; + return strncmp(key.name, ent.name, key.name_size); +} + +const(entity)* entity_lookup(const(char)* name, size_t name_size) +{ + entity_key key = entity_key(name, name_size); + const(void)* result = bsearch(&key, cast(const(void)*)entity_table.ptr, entity_table.length, entity.sizeof, &entity_cmp); + return cast(const(entity)*)result; +} + +// +// HTML RENDERING +// + +/* If set, debug output from md_parse() is sent to stderr. */ +enum MD_RENDER_FLAG_DEBUG = 0x0001; + +enum MD_RENDER_FLAG_VERBATIM_ENTITIES = 0x0002; + + +struct MD_RENDER_HTML +{ + void function(const(MD_CHAR)*, MD_SIZE, void*) nothrow @nogc process_output; + void* userdata; + uint flags; + int image_nesting_level; + char[256] escape_map; +} + + +/***************************************** + *** HTML rendering helper functions *** + *****************************************/ + +/* +#define ISDIGIT(ch) +#define ISLOWER(ch) +#define ISUPPER(ch) +*/ +bool ISALNUM_HTML(CHAR ch) +{ + return ('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z') || ('0' <= ch && ch <= '9'); +} + +void render_text(MD_RENDER_HTML* r, const(MD_CHAR)* text, MD_SIZE size) +{ + r.process_output(text, size, r.userdata); +} + +void RENDER_LITERAL(MD_RENDER_HTML* r, const(MD_CHAR)* literal) +{ + render_text(r, literal, cast(uint) strlen(literal)); +} + +/* Some characters need to be escaped in normal HTML text. */ +bool HTML_NEED_ESCAPE(MD_RENDER_HTML* r, CHAR ch) +{ + return (r.escape_map[cast(ubyte)(ch)] != 0); +} + +void render_html_escaped(MD_RENDER_HTML* r, const MD_CHAR* data, MD_SIZE size) +{ + MD_OFFSET beg = 0; + MD_OFFSET off = 0; + + while(1) { + /* Optimization: Use some loop unrolling. */ + while(off + 3 < size && !HTML_NEED_ESCAPE(r, data[off+0]) && !HTML_NEED_ESCAPE(r, data[off+1]) + && !HTML_NEED_ESCAPE(r, data[off+2]) && !HTML_NEED_ESCAPE(r, data[off+3])) + off += 4; + while(off < size && !HTML_NEED_ESCAPE(r, data[off])) + off++; + + if(off > beg) + render_text(r, data + beg, off - beg); + + if(off < size) { + switch(data[off]) { + case '&': RENDER_LITERAL(r, "&"); break; + case '<': RENDER_LITERAL(r, "<"); break; + case '>': RENDER_LITERAL(r, ">"); break; + case '"': RENDER_LITERAL(r, """); break; + default: break; + } + off++; + } else { + break; + } + beg = off; + } +} + + +bool URL_NEED_ESCAPE(CHAR ch) +{ + return (!ISALNUM_HTML(ch) && strchr("-_.+!*'(),%#@?=;:/,+$", ch) == null); +} + +void render_url_escaped(MD_RENDER_HTML* r, const MD_CHAR* data, MD_SIZE size) +{ + static immutable(MD_CHAR)[] hex_chars = "0123456789ABCDEF"; + MD_OFFSET beg = 0; + MD_OFFSET off = 0; + + while(1) { + while(off < size && !URL_NEED_ESCAPE(data[off])) + off++; + if(off > beg) + render_text(r, data + beg, off - beg); + + if(off < size) { + char[3] hex; + + switch(data[off]) { + case '&': RENDER_LITERAL(r, "&"); break; + case '\'': RENDER_LITERAL(r, "'"); break; + default: + hex[0] = '%'; + hex[1] = hex_chars[(cast(uint)data[off] >> 4) & 0xf]; + hex[2] = hex_chars[(cast(uint)data[off] >> 0) & 0xf]; + render_text(r, hex.ptr, 3); + break; + } + off++; + } else { + break; + } + + beg = off; + } +} + +uint hex_val(char ch) +{ + if('0' <= ch && ch <= '9') + return ch - '0'; + if('A' <= ch && ch <= 'Z') + return ch - 'A' + 10; + else + return ch - 'a' + 10; +} + +alias appendFunc = nothrow @nogc void function(MD_RENDER_HTML*, const(MD_CHAR)*, MD_SIZE); + +void render_utf8_codepoint(MD_RENDER_HTML* r, uint codepoint, + appendFunc fn_append) +{ + static immutable(MD_CHAR)[] utf8_replacement_char = [ 0xef, 0xbf, 0xbd ]; + + char[4] utf8; + size_t n; + + if(codepoint <= 0x7f) { + n = 1; + utf8[0] = cast(ubyte) codepoint; + } else if(codepoint <= 0x7ff) { + n = 2; + utf8[0] = 0xc0 | ((codepoint >> 6) & 0x1f); + utf8[1] = 0x80 + ((codepoint >> 0) & 0x3f); + } else if(codepoint <= 0xffff) { + n = 3; + utf8[0] = 0xe0 | ((codepoint >> 12) & 0xf); + utf8[1] = 0x80 + ((codepoint >> 6) & 0x3f); + utf8[2] = 0x80 + ((codepoint >> 0) & 0x3f); + } else { + n = 4; + utf8[0] = 0xf0 | ((codepoint >> 18) & 0x7); + utf8[1] = 0x80 + ((codepoint >> 12) & 0x3f); + utf8[2] = 0x80 + ((codepoint >> 6) & 0x3f); + utf8[3] = 0x80 + ((codepoint >> 0) & 0x3f); + } + + if(0 < codepoint && codepoint <= 0x10ffff) + fn_append(r, utf8.ptr, cast(uint)n); + else + fn_append(r, utf8_replacement_char.ptr, 3); +} + +/* Translate entity to its UTF-8 equivalent, or output the verbatim one + * if such entity is unknown (or if the translation is disabled). */ +void render_entity(MD_RENDER_HTML* r, const(MD_CHAR)* text, MD_SIZE size, + appendFunc fn_append) +{ + if(r.flags & MD_RENDER_FLAG_VERBATIM_ENTITIES) { + fn_append(r, text, size); + return; + } + + /* We assume UTF-8 output is what is desired. */ + if(size > 3 && text[1] == '#') { + uint codepoint = 0; + + if(text[2] == 'x' || text[2] == 'X') { + /* Hexadecimal entity (e.g. "�")). */ + MD_SIZE i; + for(i = 3; i < size-1; i++) + codepoint = 16 * codepoint + hex_val(text[i]); + } else { + /* Decimal entity (e.g. "&1234;") */ + MD_SIZE i; + for(i = 2; i < size-1; i++) + codepoint = 10 * codepoint + (text[i] - '0'); + } + + render_utf8_codepoint(r, codepoint, fn_append); + return; + } else { + /* Named entity (e.g. " "). */ + const(entity)* ent; + + ent = entity_lookup(text, size); + if(ent != null) { + render_utf8_codepoint(r, ent.codepoints[0], fn_append); + if(ent.codepoints[1]) + render_utf8_codepoint(r, ent.codepoints[1], fn_append); + return; + } + } + + fn_append(r, text, size); +} + +void render_attribute(MD_RENDER_HTML* r, const MD_ATTRIBUTE* attr, + appendFunc fn_append) +{ + int i; + + for(i = 0; attr.substr_offsets[i] < attr.size; i++) { + MD_TEXTTYPE type = attr.substr_types[i]; + MD_OFFSET off = attr.substr_offsets[i]; + MD_SIZE size = attr.substr_offsets[i+1] - off; + const MD_CHAR* text = attr.text + off; + + switch(type) { + case MD_TEXT_NULLCHAR: render_utf8_codepoint(r, 0x0000, &render_text); break; + case MD_TEXT_ENTITY: render_entity(r, text, size, fn_append); break; + default: fn_append(r, text, size); break; + } + } +} + + +void render_open_ol_block(MD_RENDER_HTML* r, const(MD_BLOCK_OL_DETAIL)* det) +{ + char[64] buf; + + if(det.start == 1) { + RENDER_LITERAL(r, "
      \n"); + return; + } + + snprintf(buf.ptr, buf.length, "
        \n", det.start); + RENDER_LITERAL(r, buf.ptr); +} + +void render_open_li_block(MD_RENDER_HTML* r, const(MD_BLOCK_LI_DETAIL)* det) +{ + if(det.is_task) { + RENDER_LITERAL(r, "
      1. " ~ + ""); + } else { + RENDER_LITERAL(r, "
      2. "); + } +} + +void render_open_code_block(MD_RENDER_HTML* r, const(MD_BLOCK_CODE_DETAIL)* det) +{ + RENDER_LITERAL(r, "
        ");
        +}
        +
        +void render_open_td_block(MD_RENDER_HTML* r, const(MD_CHAR)* cell_type, const(MD_BLOCK_TD_DETAIL)* det)
        +{
        +    RENDER_LITERAL(r, "<");
        +    RENDER_LITERAL(r, cell_type);
        +
        +    switch(det.align_)
        +    {
        +        case MD_ALIGN_LEFT:     RENDER_LITERAL(r, " align=\"left\">"); break;
        +        case MD_ALIGN_CENTER:   RENDER_LITERAL(r, " align=\"center\">"); break;
        +        case MD_ALIGN_RIGHT:    RENDER_LITERAL(r, " align=\"right\">"); break;
        +        default:                RENDER_LITERAL(r, ">"); break;
        +    }
        +}
        +
        +void render_open_a_span(MD_RENDER_HTML* r, const(MD_SPAN_A_DETAIL)* det)
        +{
        +    RENDER_LITERAL(r, "");
        +}
        +
        +void render_open_img_span(MD_RENDER_HTML* r, const(MD_SPAN_IMG_DETAIL)* det)
        +{
        +    RENDER_LITERAL(r, "\"");");
        +    r.image_nesting_level--;
        +}
        +
        +
        +/**************************************
        + ***  HTML renderer implementation  ***
        + **************************************/
        +
        +int enter_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata)
        +{
        +    static immutable(MD_CHAR)*[6] head = [ "

        ", "

        ", "

        ", "

        ", "

        ", "
        " ]; + MD_RENDER_HTML* r = cast(MD_RENDER_HTML*) userdata; + + switch(type) + { + case MD_BLOCK_DOC: /* noop */ break; + case MD_BLOCK_QUOTE: RENDER_LITERAL(r, "
        \n"); break; + case MD_BLOCK_UL: RENDER_LITERAL(r, "