From cf58dd17ef427eed0e82c539f298484ff72d7cef Mon Sep 17 00:00:00 2001 From: Gregory Moskaliuk Date: Tue, 9 Jun 2026 13:19:25 +0200 Subject: [PATCH] chore: sync md4c upstream from mity/md4c master Brings in latest parser changes including footnotes, superscript/subscript spans, and admonitions. Co-authored-by: Cursor --- cpp/md4c/md4c.c | 1713 ++++++++++++++++++++++++++++++++--------------- cpp/md4c/md4c.h | 85 ++- 2 files changed, 1230 insertions(+), 568 deletions(-) diff --git a/cpp/md4c/md4c.c b/cpp/md4c/md4c.c index 86c48caa..394b69f6 100644 --- a/cpp/md4c/md4c.c +++ b/cpp/md4c/md4c.c @@ -1,6 +1,6 @@ /* * MD4C: Markdown parser for C - * (http://github.com/mity/md4c) + * (https://github.com/mity/md4c) * * Copyright (c) 2016-2026 Martin Mitáš * @@ -23,14 +23,15 @@ * IN THE SOFTWARE. */ -#include "md4c.h" - #include #include #include #include +#include #include +#include "md4c.h" + /***************************** *** Miscellaneous Stuff *** @@ -128,12 +129,6 @@ * resolving would be then O(n^2). */ #define CODESPAN_MARK_MAXLEN 32 -/* We limit column count of tables to prevent quadratic explosion of output - * from pathological input of a table thousands of columns and thousands - * of rows where rows are requested with as little as single character - * per-line, relying on us to "helpfully" fill all the missing "". */ -#define TABLE_MAXCOLCOUNT 128 - /************************ *** Internal Types *** @@ -148,6 +143,35 @@ typedef struct MD_MARK_tag MD_MARK; typedef struct MD_BLOCK_tag MD_BLOCK; typedef struct MD_CONTAINER_tag MD_CONTAINER; typedef struct MD_REF_DEF_tag MD_REF_DEF; +typedef struct MD_FOOTNOTE_DEF_tag MD_FOOTNOTE_DEF; + +/* Forward declaration; full definition is below with MD_LINE_tag. */ +typedef struct MD_LINE_tag MD_LINE; + +typedef struct MD_LABEL_HASH_ENTRY_tag MD_LABEL_HASH_ENTRY; +struct MD_LABEL_HASH_ENTRY_tag { + const CHAR* label; + SZ label_size; + unsigned hash; +}; + +typedef struct MD_LABEL_HASH_TABLE_tag MD_LABEL_HASH_TABLE; +struct MD_LABEL_HASH_TABLE_tag { + /* Flat array of all records. */ + union { + void* defs; + MD_REF_DEF* ref_defs; + MD_FOOTNOTE_DEF* footnote_defs; + }; + unsigned def_size; + unsigned n_defs; + unsigned alloc_defs; + + /* The hashtable itself, where the actual entries are pointers into the + * array defs[] above. */ + void** buckets; + unsigned n_buckets; +}; /* During analyzes of inline marks, we need to manage stacks of unresolved @@ -176,13 +200,13 @@ struct MD_CTX_tag { unsigned alloc_buffer; /* Reference definitions. */ - MD_REF_DEF* ref_defs; - int n_ref_defs; - int alloc_ref_defs; - void** ref_def_hashtable; - int ref_def_hashtable_size; + MD_LABEL_HASH_TABLE ref_def_hashtable; SZ max_ref_def_output; + /* Footnote definitions. */ + MD_LABEL_HASH_TABLE footnote_hashtable; + unsigned next_footnote_index; /* 1-based counter for sequential numbering */ + /* Stack of inline/span markers. * This is only used for parsing a single block contents but by storing it * here we may reuse the stack for subsequent blocks; i.e. we have fewer @@ -198,7 +222,7 @@ struct MD_CTX_tag { #endif /* For resolving of inline spans. */ - MD_MARKSTACK opener_stacks[18]; + MD_MARKSTACK opener_stacks[19]; #define ASTERISK_OPENERS_oo_mod3_0 (ctx->opener_stacks[0]) /* Opener-only */ #define ASTERISK_OPENERS_oo_mod3_1 (ctx->opener_stacks[1]) #define ASTERISK_OPENERS_oo_mod3_2 (ctx->opener_stacks[2]) @@ -217,6 +241,7 @@ struct MD_CTX_tag { #define DOLLAR_OPENERS (ctx->opener_stacks[15]) #define PIPE_OPENERS (ctx->opener_stacks[16]) #define CARET_OPENERS (ctx->opener_stacks[17]) +#define EQUAL_OPENERS (ctx->opener_stacks[18]) /* Stack of dummies which need to call free() for pointers stored in them. * These are constructed during inline parsing and freed after all the block @@ -290,7 +315,6 @@ struct MD_LINE_ANALYSIS_tag { unsigned indent; /* Indentation level. */ }; -typedef struct MD_LINE_tag MD_LINE; struct MD_LINE_tag { OFF beg; OFF end; @@ -350,8 +374,10 @@ struct MD_VERBATIMLINE_tag { #if defined MD4C_USE_UTF16 #define md_strchr wcschr + #define md_strlen wcslen #else #define md_strchr strchr + #define md_strlen strlen #endif @@ -979,6 +1005,7 @@ struct MD_UNICODE_FOLD_INFO_tag { static inline unsigned md_decode_unicode(const CHAR* str, OFF off, SZ str_size, SZ* p_size) { + MD_UNUSED(str_size); *p_size = 1; return (unsigned) str[off]; } @@ -1301,7 +1328,7 @@ md_is_html_cdata(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines, OFF beg, OF if(off + open_size >= lines[0].end) return FALSE; - if(memcmp(STR(off), open_str, open_size) != 0) + if(memcmp(STR(off), open_str, open_size * sizeof(CHAR)) != 0) return FALSE; off += open_size; @@ -1421,8 +1448,8 @@ struct MD_ATTRIBUTE_BUILD_tag { CHAR* text; MD_TEXTTYPE* substr_types; OFF* substr_offsets; - int substr_count; - int substr_alloc; + SZ substr_count; + SZ substr_alloc; MD_TEXTTYPE trivial_types[1]; OFF trivial_offsets[2]; }; @@ -1437,27 +1464,27 @@ md_build_attr_append_substr(MD_CTX* ctx, MD_ATTRIBUTE_BUILD* build, if(build->substr_count >= build->substr_alloc) { MD_TEXTTYPE* new_substr_types; OFF* new_substr_offsets; - - build->substr_alloc = (build->substr_alloc > 0 + SZ new_alloc = (build->substr_alloc > 0 ? build->substr_alloc + build->substr_alloc / 2 : 8); + new_substr_types = (MD_TEXTTYPE*) realloc(build->substr_types, - build->substr_alloc * sizeof(MD_TEXTTYPE)); + new_alloc * sizeof(MD_TEXTTYPE)); if(new_substr_types == NULL) { MD_LOG("realloc() failed."); return -1; } + build->substr_types = new_substr_types; + /* Note +1 to reserve space for final offset (== raw_size). */ new_substr_offsets = (OFF*) realloc(build->substr_offsets, - (build->substr_alloc+1) * sizeof(OFF)); + (new_alloc+1) * sizeof(OFF)); if(new_substr_offsets == NULL) { MD_LOG("realloc() failed."); - free(new_substr_types); return -1; } - - build->substr_types = new_substr_types; build->substr_offsets = new_substr_offsets; + build->substr_alloc = new_alloc; } build->substr_types[build->substr_count] = type; @@ -1521,7 +1548,7 @@ md_build_attribute(MD_CTX* ctx, const CHAR* raw_text, SZ raw_size, while(raw_off < raw_size) { if(raw_text[raw_off] == _T('\0')) { MD_CHECK(md_build_attr_append_substr(ctx, build, MD_TEXT_NULLCHAR, off)); - memcpy(build->text + off, raw_text + raw_off, 1); + memcpy(build->text + off, raw_text + raw_off, sizeof(CHAR)); off++; raw_off++; continue; @@ -1532,7 +1559,7 @@ md_build_attribute(MD_CTX* ctx, const CHAR* raw_text, SZ raw_size, if(md_is_entity_str(ctx, raw_text, raw_off, raw_size, &ent_end)) { MD_CHECK(md_build_attr_append_substr(ctx, build, MD_TEXT_ENTITY, off)); - memcpy(build->text + off, raw_text + raw_off, ent_end - raw_off); + memcpy(build->text + off, raw_text + raw_off, (ent_end - raw_off) * sizeof(CHAR)); off += ent_end - raw_off; raw_off = ent_end; continue; @@ -1564,9 +1591,9 @@ md_build_attribute(MD_CTX* ctx, const CHAR* raw_text, SZ raw_size, } -/********************************************* - *** Dictionary of Reference Definitions *** - *********************************************/ +/************************************************** + *** Hashtable (with a text label as its key) *** + **************************************************/ #define MD_FNV1A_BASE 2166136261U #define MD_FNV1A_PRIME 16777619U @@ -1586,25 +1613,12 @@ md_fnv1a(unsigned base, const void* data, size_t n) return hash; } - -struct MD_REF_DEF_tag { - CHAR* label; - CHAR* title; - unsigned hash; - SZ label_size; - SZ title_size; - OFF dest_beg; - OFF dest_end; - unsigned char label_needs_free : 1; - unsigned char title_needs_free : 1; -}; - /* Label equivalence is quite complicated with regards to whitespace and case * folding. This complicates computing a hash of it as well as direct comparison * of two labels. */ static unsigned -md_link_label_hash(const CHAR* label, SZ size) +md_label_hash(const CHAR* label, SZ size) { unsigned hash = MD_FNV1A_BASE; OFF off; @@ -1635,8 +1649,8 @@ md_link_label_hash(const CHAR* label, SZ size) } static OFF -md_link_label_cmp_load_fold_info(const CHAR* label, OFF off, SZ size, - MD_UNICODE_FOLD_INFO* fold_info) +md_label_cmp_load_fold_info(const CHAR* label, OFF off, SZ size, + MD_UNICODE_FOLD_INFO* fold_info) { unsigned codepoint; SZ char_size; @@ -1664,7 +1678,7 @@ md_link_label_cmp_load_fold_info(const CHAR* label, OFF off, SZ size, } static int -md_link_label_cmp(const CHAR* a_label, SZ a_size, const CHAR* b_label, SZ b_size) +md_label_cmp(const CHAR* a_label, SZ a_size, const CHAR* b_label, SZ b_size) { OFF a_off; OFF b_off; @@ -1682,11 +1696,11 @@ md_link_label_cmp(const CHAR* a_label, SZ a_size, const CHAR* b_label, SZ b_size /* If needed, load fold info for next char. */ if(a_fi_off >= a_fi.n_codepoints) { a_fi_off = 0; - a_off = md_link_label_cmp_load_fold_info(a_label, a_off, a_size, &a_fi); + a_off = md_label_cmp_load_fold_info(a_label, a_off, a_size, &a_fi); } if(b_fi_off >= b_fi.n_codepoints) { b_fi_off = 0; - b_off = md_link_label_cmp_load_fold_info(b_label, b_off, b_size, &b_fi); + b_off = md_label_cmp_load_fold_info(b_label, b_off, b_size, &b_fi); } cmp = b_fi.codepoints[b_fi_off] - a_fi.codepoints[a_fi_off]; @@ -1700,155 +1714,165 @@ md_link_label_cmp(const CHAR* a_label, SZ a_size, const CHAR* b_label, SZ b_size return 0; } -typedef struct MD_REF_DEF_LIST_tag MD_REF_DEF_LIST; -struct MD_REF_DEF_LIST_tag { - int n_ref_defs; - int alloc_ref_defs; - MD_REF_DEF* ref_defs[]; /* Valid items always point into ctx->ref_defs[] */ +typedef struct MD_LABEL_HASH_LIST_tag MD_LABEL_HASH_LIST; +struct MD_LABEL_HASH_LIST_tag { + unsigned n_entries; + unsigned alloc_entries; + MD_LABEL_HASH_ENTRY* entries[]; }; static int -md_ref_def_cmp(const void* a, const void* b) +md_label_hash_entry_cmp(const void* a, const void* b) { - const MD_REF_DEF* a_ref = *(const MD_REF_DEF**)a; - const MD_REF_DEF* b_ref = *(const MD_REF_DEF**)b; + const MD_LABEL_HASH_ENTRY* a_entry = *(const MD_LABEL_HASH_ENTRY**)a; + const MD_LABEL_HASH_ENTRY* b_entry = *(const MD_LABEL_HASH_ENTRY**)b; - if(a_ref->hash < b_ref->hash) + if(a_entry->hash < b_entry->hash) return -1; - else if(a_ref->hash > b_ref->hash) + else if(a_entry->hash > b_entry->hash) return +1; else - return md_link_label_cmp(a_ref->label, a_ref->label_size, b_ref->label, b_ref->label_size); + return md_label_cmp(a_entry->label, a_entry->label_size, + b_entry->label, b_entry->label_size); } static int -md_ref_def_cmp_for_sort(const void* a, const void* b) +md_label_hash_entry_cmp_for_sort(const void* a, const void* b) { int cmp; - cmp = md_ref_def_cmp(a, b); + cmp = md_label_hash_entry_cmp(a, b); /* Ensure stability of the sorting. */ if(cmp == 0) { - const MD_REF_DEF* a_ref = *(const MD_REF_DEF**)a; - const MD_REF_DEF* b_ref = *(const MD_REF_DEF**)b; + const MD_LABEL_HASH_ENTRY* a_entry = *(const MD_LABEL_HASH_ENTRY**)a; + const MD_LABEL_HASH_ENTRY* b_entry = *(const MD_LABEL_HASH_ENTRY**)b; - if(a_ref < b_ref) + if(a_entry < b_entry) cmp = -1; - else if(a_ref > b_ref) + else if(a_entry > b_entry) cmp = +1; - else - cmp = 0; } return cmp; } static int -md_build_ref_def_hashtable(MD_CTX* ctx) +md_is_complex_label_bucket(MD_LABEL_HASH_TABLE* table, void* bucket) +{ + if(bucket == NULL) + return FALSE; + + return (MD_LABEL_HASH_ENTRY*) bucket < (MD_LABEL_HASH_ENTRY*) table->defs || + (MD_LABEL_HASH_ENTRY*) bucket >= (MD_LABEL_HASH_ENTRY*)((char*)table->defs + table->n_defs * table->def_size); +} + +static int +md_build_label_hashtable(MD_CTX* ctx, MD_LABEL_HASH_TABLE* table) { - int i, j; + unsigned i, j; - if(ctx->n_ref_defs == 0) + if(table->n_defs == 0) return 0; - ctx->ref_def_hashtable_size = (ctx->n_ref_defs * 5) / 4; - ctx->ref_def_hashtable = malloc(ctx->ref_def_hashtable_size * sizeof(void*)); - if(ctx->ref_def_hashtable == NULL) { + table->n_buckets = (table->n_defs * 5) / 4; + table->buckets = malloc(table->n_buckets * sizeof(void*)); + if(table->buckets == NULL) { MD_LOG("malloc() failed."); goto abort; } - memset(ctx->ref_def_hashtable, 0, ctx->ref_def_hashtable_size * sizeof(void*)); + memset(table->buckets, 0, table->n_buckets * sizeof(void*)); - /* Each member of ctx->ref_def_hashtable[] can be: + /* Each member of table->buckets[] can be: * -- NULL, - * -- pointer to the MD_REF_DEF in ctx->ref_defs[], or - * -- pointer to a MD_REF_DEF_LIST, which holds multiple pointers to - * such MD_REF_DEFs. + * -- pointer to the MD_LABEL_HASH_ENTRY, or + * -- pointer to a MD_LABEL_HASH_LIST, which holds multiple pointers to + * such entries. */ - for(i = 0; i < ctx->n_ref_defs; i++) { - MD_REF_DEF* def = &ctx->ref_defs[i]; - void* bucket; - MD_REF_DEF_LIST* list; - - def->hash = md_link_label_hash(def->label, def->label_size); - bucket = ctx->ref_def_hashtable[def->hash % ctx->ref_def_hashtable_size]; - - if(bucket == NULL) { - /* The bucket is empty. Make it just point to the def. */ - ctx->ref_def_hashtable[def->hash % ctx->ref_def_hashtable_size] = def; + for(i = 0; i < table->n_defs; i++) { + MD_LABEL_HASH_ENTRY* entry = (MD_LABEL_HASH_ENTRY*) ((char*) table->defs + i * table->def_size); + void** p_bucket = &table->buckets[entry->hash % table->n_buckets]; + MD_LABEL_HASH_LIST* list; + + if(*p_bucket == NULL) { + /* The bucket is empty. Make it just point to the entry. */ + *p_bucket = entry; continue; } - if(ctx->ref_defs <= (MD_REF_DEF*) bucket && (MD_REF_DEF*) bucket < ctx->ref_defs + ctx->n_ref_defs) { - /* The bucket already contains one ref. def. Lets see whether it - * is the same label (ref. def. duplicate) or different one - * (hash conflict). */ - MD_REF_DEF* old_def = (MD_REF_DEF*) bucket; + if(!md_is_complex_label_bucket(table, *p_bucket)) + { + /* The bucket already contains one entry. Lets see whether it is the + * same label (duplicate) or different one (hash conflict). */ + MD_LABEL_HASH_ENTRY* old_entry = (MD_LABEL_HASH_ENTRY*) *p_bucket; - if(md_link_label_cmp(def->label, def->label_size, old_def->label, old_def->label_size) == 0) { - /* Duplicate label: Ignore this ref. def. */ + if(md_label_cmp(entry->label, entry->label_size, + old_entry->label, old_entry->label_size) == 0) { + /* Duplicate label: We may ignore this definition. */ continue; } - /* Make the bucket complex, i.e. able to hold more ref. defs. */ - list = (MD_REF_DEF_LIST*) malloc(sizeof(MD_REF_DEF_LIST) + 2 * sizeof(MD_REF_DEF*)); + /* Make the bucket complex, i.e. able to hold more entries. */ + list = (MD_LABEL_HASH_LIST*) malloc(sizeof(MD_LABEL_HASH_LIST) + + 2 * sizeof(MD_LABEL_HASH_ENTRY*)); if(list == NULL) { MD_LOG("malloc() failed."); goto abort; } - list->ref_defs[0] = old_def; - list->ref_defs[1] = def; - list->n_ref_defs = 2; - list->alloc_ref_defs = 2; - ctx->ref_def_hashtable[def->hash % ctx->ref_def_hashtable_size] = list; + list->entries[0] = old_entry; + list->entries[1] = entry; + list->n_entries = 2; + list->alloc_entries = 2; + *p_bucket = list; continue; } - /* Append the def to the complex bucket list. + /* Append the entry to the complex bucket list. * * Note in this case we ignore potential duplicates to avoid expensive * iterating over the complex bucket. Below, we revisit all the complex * buckets and handle it more cheaply after the complex bucket contents * is sorted. */ - list = (MD_REF_DEF_LIST*) bucket; - if(list->n_ref_defs >= list->alloc_ref_defs) { - int alloc_ref_defs = list->alloc_ref_defs + list->alloc_ref_defs / 2; - MD_REF_DEF_LIST* list_tmp = (MD_REF_DEF_LIST*) realloc(list, - sizeof(MD_REF_DEF_LIST) + alloc_ref_defs * sizeof(MD_REF_DEF*)); + list = (MD_LABEL_HASH_LIST*) *p_bucket; + if(list->n_entries >= list->alloc_entries) { + size_t alloc_entries = list->alloc_entries + list->alloc_entries / 2; + MD_LABEL_HASH_LIST* list_tmp = (MD_LABEL_HASH_LIST*) realloc(list, + sizeof(MD_LABEL_HASH_LIST) + alloc_entries * sizeof(MD_LABEL_HASH_ENTRY*)); if(list_tmp == NULL) { MD_LOG("realloc() failed."); goto abort; } list = list_tmp; - list->alloc_ref_defs = alloc_ref_defs; - ctx->ref_def_hashtable[def->hash % ctx->ref_def_hashtable_size] = list; + list->alloc_entries = alloc_entries; + table->buckets[entry->hash % table->n_buckets] = list; } - list->ref_defs[list->n_ref_defs] = def; - list->n_ref_defs++; + list->entries[list->n_entries] = entry; + list->n_entries++; } /* Sort the complex buckets so we can use bsearch() with them. */ - for(i = 0; i < ctx->ref_def_hashtable_size; i++) { - void* bucket = ctx->ref_def_hashtable[i]; - MD_REF_DEF_LIST* list; + for(i = 0; i < table->n_buckets; i++) { + void* bucket = table->buckets[i]; + MD_LABEL_HASH_LIST* list; if(bucket == NULL) continue; - if(ctx->ref_defs <= (MD_REF_DEF*) bucket && (MD_REF_DEF*) bucket < ctx->ref_defs + ctx->n_ref_defs) + + if(!md_is_complex_label_bucket(table, bucket)) continue; - list = (MD_REF_DEF_LIST*) bucket; - qsort(list->ref_defs, list->n_ref_defs, sizeof(MD_REF_DEF*), md_ref_def_cmp_for_sort); + list = (MD_LABEL_HASH_LIST*) bucket; + qsort(list->entries, list->n_entries, sizeof(MD_LABEL_HASH_ENTRY*), + md_label_hash_entry_cmp_for_sort); /* Disable all duplicates in the complex bucket by forcing all such - * records to point to the 1st such ref. def. I.e. no matter which + * records to point to the 1st such entry. I.e. no matter which * record is found during the lookup, it will always point to the right - * ref. def. in ctx->ref_defs[]. */ - for(j = 1; j < list->n_ref_defs; j++) { - if(md_ref_def_cmp(&list->ref_defs[j-1], &list->ref_defs[j]) == 0) - list->ref_defs[j] = list->ref_defs[j-1]; + * definition. */ + for(j = 1; j < list->n_entries; j++) { + if(md_label_hash_entry_cmp(&list->entries[j-1], &list->entries[j]) == 0) + list->entries[j] = list->entries[j-1]; } } @@ -1859,57 +1883,59 @@ md_build_ref_def_hashtable(MD_CTX* ctx) } static void -md_free_ref_def_hashtable(MD_CTX* ctx) +md_free_label_hashtable(MD_CTX* ctx, MD_LABEL_HASH_TABLE* table) { - if(ctx->ref_def_hashtable != NULL) { - int i; + MD_UNUSED(ctx); - for(i = 0; i < ctx->ref_def_hashtable_size; i++) { - void* bucket = ctx->ref_def_hashtable[i]; - if(bucket == NULL) - continue; - if(ctx->ref_defs <= (MD_REF_DEF*) bucket && (MD_REF_DEF*) bucket < ctx->ref_defs + ctx->n_ref_defs) - continue; - free(bucket); + if(table->buckets != NULL) { + unsigned i; + + for(i = 0; i < table->n_buckets; i++) { + void* bucket = table->buckets[i]; + if(md_is_complex_label_bucket(table, bucket)) + free(bucket); } - free(ctx->ref_def_hashtable); + free(table->buckets); } } -static const MD_REF_DEF* -md_lookup_ref_def(MD_CTX* ctx, const CHAR* label, SZ label_size) +static const MD_LABEL_HASH_ENTRY* +md_lookup_label_hashtable(MD_CTX* ctx, MD_LABEL_HASH_TABLE* table, const CHAR* label, SZ label_size) { unsigned hash; void* bucket; - if(ctx->ref_def_hashtable_size == 0) + MD_UNUSED(ctx); + + if(table->n_buckets == 0) return NULL; - hash = md_link_label_hash(label, label_size); - bucket = ctx->ref_def_hashtable[hash % ctx->ref_def_hashtable_size]; + hash = md_label_hash(label, label_size); + bucket = table->buckets[hash % table->n_buckets]; if(bucket == NULL) { return NULL; - } else if(ctx->ref_defs <= (MD_REF_DEF*) bucket && (MD_REF_DEF*) bucket < ctx->ref_defs + ctx->n_ref_defs) { - const MD_REF_DEF* def = (MD_REF_DEF*) bucket; + } else if(!md_is_complex_label_bucket(table, bucket)) { + MD_LABEL_HASH_ENTRY* entry = (MD_LABEL_HASH_ENTRY*) bucket; - if(md_link_label_cmp(def->label, def->label_size, label, label_size) == 0) - return def; + if(entry->hash == hash && + md_label_cmp(entry->label, entry->label_size, label, label_size) == 0) + return entry; else return NULL; } else { - MD_REF_DEF_LIST* list = (MD_REF_DEF_LIST*) bucket; - MD_REF_DEF key_buf; - const MD_REF_DEF* key = &key_buf; - const MD_REF_DEF** ret; + MD_LABEL_HASH_LIST* list = (MD_LABEL_HASH_LIST*) bucket; + MD_LABEL_HASH_ENTRY key_buf; + const MD_LABEL_HASH_ENTRY* key = &key_buf; + MD_LABEL_HASH_ENTRY** ret; key_buf.label = (CHAR*) label; key_buf.label_size = label_size; - key_buf.hash = md_link_label_hash(key_buf.label, key_buf.label_size); + key_buf.hash = hash; - ret = (const MD_REF_DEF**) bsearch(&key, list->ref_defs, - list->n_ref_defs, sizeof(MD_REF_DEF*), md_ref_def_cmp); + ret = (MD_LABEL_HASH_ENTRY**) bsearch(&key, list->entries, + list->n_entries, sizeof(MD_LABEL_HASH_ENTRY*), md_label_hash_entry_cmp); if(ret != NULL) return *ret; else @@ -1917,6 +1943,212 @@ md_lookup_ref_def(MD_CTX* ctx, const CHAR* label, SZ label_size) } } +static void* +md_add_label_def(MD_CTX* ctx, MD_LABEL_HASH_TABLE* table, const CHAR* label, SZ label_size) +{ + MD_LABEL_HASH_ENTRY* entry; + + if(table->n_defs >= table->alloc_defs) { + size_t new_alloc_defs; + void* new_defs; + + new_alloc_defs = (table->alloc_defs > 0 + ? table->alloc_defs + table->alloc_defs / 2 + : 8); + new_defs = realloc(table->defs, new_alloc_defs * table->def_size); + if(new_defs == NULL) { + MD_LOG("realloc() failed."); + return NULL; + } + + table->defs = new_defs; + table->alloc_defs = new_alloc_defs; + } + + entry = (MD_LABEL_HASH_ENTRY*)((char*)table->defs + table->n_defs * table->def_size); + memset(entry, 0, table->def_size); + entry->label = label; + entry->label_size = label_size; + entry->hash = md_label_hash(label, label_size); + + table->n_defs++; + + return entry; +} + + +/************************************ + *** Link Reference Definitions *** + ************************************/ + +struct MD_REF_DEF_tag { + MD_LABEL_HASH_ENTRY entry; + CHAR* title; + SZ title_size; + OFF dest_beg; + OFF dest_end; + unsigned char label_needs_free : 1; + unsigned char title_needs_free : 1; +}; + +static int +md_build_ref_def_hashtable(MD_CTX* ctx) +{ + return md_build_label_hashtable(ctx, &ctx->ref_def_hashtable); +} + +static const MD_REF_DEF* +md_lookup_ref_def(MD_CTX* ctx, const CHAR* label, SZ label_size) +{ + return (const MD_REF_DEF*) md_lookup_label_hashtable(ctx, + &ctx->ref_def_hashtable, label, label_size); +} + + +/******************************* + *** Footnote Definitions *** + *******************************/ + +typedef struct MD_FOOTNOTE_DEF_tag MD_FOOTNOTE_DEF; +struct MD_FOOTNOTE_DEF_tag { + MD_LABEL_HASH_ENTRY entry; + unsigned int index; /* 0 = unreferenced; 1-based order of first reference */ + unsigned int ref_count; /* Number of references to this footnote. */ + MD_LINE* content_lines; /* always heap-allocated; freed by md_free_footnote_defs */ + MD_SIZE n_content_lines; +}; + +/* Returns 0 if not a footnote definition. + * Returns N > 0 (number of lines consumed) if it is one and the definition + * was stored successfully. + * Returns -1 on memory allocation error. + */ +static int +md_is_footnote_definition(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines) +{ + OFF off; + OFF label_beg, label_end; + MD_LINE* content_lines = NULL; + MD_FOOTNOTE_DEF* def; + MD_SIZE n; + MD_SIZE n_content_lines; + int ret = 0; + + /* Caller guarantees: n_lines >= 1 and lines[0] starts with [^. */ + MD_ASSERT(n_lines >= 1); + off = lines[0].beg; + MD_ASSERT(CH(off) == _T('[') && CH(off+1) == _T('^')); + off += 2; + + /* Label: non-empty sequence of non-whitespace, non-bracket chars. */ + label_beg = off; + while(off < lines[0].end && CH(off) != _T(']') && !ISWHITESPACE(off) && CH(off) != _T('[')) + off++; + label_end = off; + if(label_end == label_beg) + return FALSE; + + /* Closing bracket. */ + if(off >= lines[0].end || CH(off) != _T(']')) + return FALSE; + off++; + + /* Colon. */ + if(off >= lines[0].end || CH(off) != _T(':')) + return FALSE; + off++; + + /* Skip optional whitespace after colon on the first line. */ + while(off < lines[0].end && ISWHITESPACE(off)) + off++; + + /* Count continuation lines. GitHub-style footnotes allow the rest of the + * paragraph block to form the footnote body, including unindented lines. + * + * Blank lines cannot appear inside a paragraph block (they always trigger + * a block boundary), so we do not need to handle them here. Stop before a + * following line which itself starts a new footnote definition. */ + n = 1; + while(n < n_lines) { + OFF def_off = lines[n].beg; + + if(def_off + 3 < lines[n].end && CH(def_off) == _T('[') && CH(def_off+1) == _T('^')) { + OFF tmp = def_off + 2; + + while(tmp < lines[n].end && CH(tmp) != _T(']') && + !ISWHITESPACE(tmp) && CH(tmp) != _T('[')) + tmp++; + + if(tmp > def_off + 2 && tmp + 1 < lines[n].end && + CH(tmp) == _T(']') && CH(tmp+1) == _T(':')) + break; + } + + n++; + } + + /* Build content_lines array. + * Line 0 content starts after the "[^label]: " prefix. + * Lines 1..n-1 are stored verbatim (md4c strips indentation before + * handing us MD_LINE, so no further adjustment is needed). */ + n_content_lines = (off >= lines[0].end && n > 1) ? n - 1 : n; + content_lines = (MD_LINE*) malloc(n_content_lines * sizeof(MD_LINE)); + if(content_lines == NULL) { + MD_LOG("malloc() failed."); + ret = -1; + goto abort; + } + + if(n_content_lines < n) { + memcpy(content_lines, lines + 1, n_content_lines * sizeof(MD_LINE)); + } else { + content_lines[0].beg = off; + content_lines[0].end = lines[0].end; + if(n > 1) + memcpy(content_lines + 1, lines + 1, (n - 1) * sizeof(MD_LINE)); + } + + def = (MD_FOOTNOTE_DEF*) md_add_label_def(ctx, &ctx->footnote_hashtable, + (CHAR*) STR(label_beg), label_end - label_beg); + if(def == NULL) { + ret = -1; + goto abort; + } + def->content_lines = content_lines; + def->n_content_lines = n_content_lines; + + return (int) n; + +abort: + free(content_lines); + return ret; +} + +static MD_FOOTNOTE_DEF* +md_lookup_footnote_def(MD_CTX* ctx, const CHAR* label, SZ label_size) +{ + return (MD_FOOTNOTE_DEF*) md_lookup_label_hashtable(ctx, + &ctx->footnote_hashtable, label, label_size); +} + +static int +md_build_footnote_def_hashtable(MD_CTX* ctx) +{ + return md_build_label_hashtable(ctx, &ctx->footnote_hashtable); +} + +static void +md_free_footnote_defs(MD_CTX* ctx) +{ + unsigned i; + + md_free_label_hashtable(ctx, &ctx->footnote_hashtable); + + for(i = 0; i < ctx->footnote_hashtable.n_defs; i++) + free(ctx->footnote_hashtable.footnote_defs[i].content_lines); + free(ctx->footnote_hashtable.footnote_defs); +} + /*************************** *** Recognizing Links *** @@ -2232,32 +2464,24 @@ md_is_link_reference_definition(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lin if(off < lines[line_index].end) return FALSE; - /* So, it _is_ a reference definition. Remember it. */ - if(ctx->n_ref_defs >= ctx->alloc_ref_defs) { - MD_REF_DEF* new_defs; - - ctx->alloc_ref_defs = (ctx->alloc_ref_defs > 0 - ? ctx->alloc_ref_defs + ctx->alloc_ref_defs / 2 - : 16); - new_defs = (MD_REF_DEF*) realloc(ctx->ref_defs, ctx->alloc_ref_defs * sizeof(MD_REF_DEF)); - if(new_defs == NULL) { - MD_LOG("realloc() failed."); - goto abort; - } - - ctx->ref_defs = new_defs; - } - def = &ctx->ref_defs[ctx->n_ref_defs]; - memset(def, 0, sizeof(MD_REF_DEF)); - if(label_is_multiline) { + CHAR* label; + SZ label_size; + MD_CHECK(md_merge_lines_alloc(ctx, label_contents_beg, label_contents_end, lines + label_contents_line_index, n_lines - label_contents_line_index, - _T(' '), &def->label, &def->label_size)); + _T(' '), &label, &label_size)); + def = (MD_REF_DEF*) md_add_label_def(ctx, &ctx->ref_def_hashtable, label, label_size); + if(def == NULL) { + free(label); + goto abort; + } def->label_needs_free = TRUE; } else { - def->label = (CHAR*) STR(label_contents_beg); - def->label_size = label_contents_end - label_contents_beg; + def = (MD_REF_DEF*) md_add_label_def(ctx, &ctx->ref_def_hashtable, + STR(label_contents_beg), label_contents_end - label_contents_beg); + if(def == NULL) + goto abort; } if(title_is_multiline) { @@ -2274,13 +2498,12 @@ md_is_link_reference_definition(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lin def->dest_end = dest_contents_end; /* Success. */ - ctx->n_ref_defs++; return line_index + 1; abort: /* Failure. */ if(def != NULL && def->label_needs_free) - free(def->label); + free((CHAR*) def->entry.label); if(def != NULL && def->title_needs_free) free(def->title); return ret; @@ -2332,7 +2555,7 @@ md_is_link_reference(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines, if(def != NULL) { /* See https://github.com/mity/md4c/issues/238 */ - MD_SIZE output_size_estimation = def->label_size + def->title_size + def->dest_end - def->dest_beg; + MD_SIZE output_size_estimation = def->entry.label_size + def->title_size + def->dest_end - def->dest_beg; if(output_size_estimation < ctx->max_ref_def_output) { ctx->max_ref_def_output -= output_size_estimation; ret = TRUE; @@ -2445,18 +2668,20 @@ md_is_inline_link_spec(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines, static void md_free_ref_defs(MD_CTX* ctx) { - int i; + unsigned i; + + md_free_label_hashtable(ctx, &ctx->ref_def_hashtable); - for(i = 0; i < ctx->n_ref_defs; i++) { - MD_REF_DEF* def = &ctx->ref_defs[i]; + for(i = 0; i < ctx->ref_def_hashtable.n_defs; i++) { + MD_REF_DEF* def = &ctx->ref_def_hashtable.ref_defs[i]; if(def->label_needs_free) - free(def->label); + free((void*) def->entry.label); if(def->title_needs_free) free(def->title); } - free(ctx->ref_defs); + free(ctx->ref_def_hashtable.ref_defs); } @@ -2509,6 +2734,7 @@ md_free_ref_defs(MD_CTX* ctx) * ';': Maybe end of entity. * '<': Maybe start of raw HTML or autolink. * '>': Maybe end of raw HTML or autolink. + * '=': Maybe highlight start/end (needs MD_FLAG_HIGHLIGHT). * '[': Maybe start of link label or link text. * '!': Equivalent of '[' for image. * ']': Maybe end of link label or link text. @@ -2532,7 +2758,7 @@ struct MD_MARK_tag { OFF beg; OFF end; }; - void* pointer; // Dummy marks can sometimes store a pointer + void* pointer; /* Dummy marks can sometimes store a pointer */ }; /* For unresolved openers, 'next' may be used to form a stack of @@ -2554,16 +2780,18 @@ struct MD_MARK_tag { #define MD_MARK_CLOSER 0x08 /* Definitely closer. */ #define MD_MARK_RESOLVED 0x10 /* Resolved in any definite way. */ -/* Mark flags specific for various mark types (so they can share bits). */ +/* Mark flags specific for just some mark type(s) (so the bits can be reused). */ #define MD_MARK_EMPH_OC 0x20 /* Opener/closer mixed candidate. Helper for the "rule of 3". */ #define MD_MARK_EMPH_MOD3_0 0x40 #define MD_MARK_EMPH_MOD3_1 0x80 #define MD_MARK_EMPH_MOD3_2 (0x40 | 0x80) #define MD_MARK_EMPH_MOD3_MASK (0x40 | 0x80) #define MD_MARK_AUTOLINK 0x20 /* Distinguisher for '<', '>'. */ -#define MD_MARK_AUTOLINK_MISSING_MAILTO 0x40 -#define MD_MARK_VALIDPERMISSIVEAUTOLINK 0x20 /* For permissive autolinks. */ -#define MD_MARK_HASNESTEDBRACKETS 0x20 /* For '[' to rule out invalid link labels early */ +#define MD_MARK_AUTOLINK_MISSING_MAILTO 0x40 /* For '@' */ +#define MD_MARK_VALIDPERMISSIVEAUTOLINK 0x20 /* For '@', ':', '.'. */ +#define MD_MARK_BRACKET_CANBEIMAGE 0x20 /* For '[', if can be expanded to the left to eat '!'. */ +#define MD_MARK_BRACKET_HASNESTED 0x40 /* For '[' to rule out invalid link labels early. */ +#define MD_MARK_BRACKET_FOOTNOTEREF 0x80 /* For '[', To distinguish footnotes. */ static MD_MARKSTACK* md_emph_stack(MD_CTX* ctx, MD_CHAR ch, unsigned flags) @@ -2715,8 +2943,10 @@ md_disable_marks(MD_CTX* ctx, int mark_index0, int mark_index1) int i; for(i = mark_index0; i < mark_index1; i++) { - ctx->marks[i].ch = 'D'; - ctx->marks[i].flags &= ~MD_MARK_RESOLVED; + MD_MARK* mark = &ctx->marks[i]; + + mark->ch = 'D'; + mark->flags = 0; } } @@ -2747,6 +2977,9 @@ md_build_mark_char_map(MD_CTX* ctx) if(ctx->parser.flags & MD_FLAG_LATEXMATHSPANS) ctx->mark_char_map['$'] = 1; + if(ctx->parser.flags & MD_FLAG_HIGHLIGHT) + ctx->mark_char_map['='] = 1; + if(ctx->parser.flags & MD_FLAG_PERMISSIVEEMAILAUTOLINKS) ctx->mark_char_map['@'] = 1; @@ -2900,7 +3133,7 @@ md_is_autolink_uri(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end) MD_ASSERT(CH(beg) == _T('<')); /* Check for scheme. */ - if(off >= max_end || !ISASCII(off)) + if(off >= max_end || !ISALNUM(off)) return FALSE; off++; while(1) { @@ -3202,11 +3435,16 @@ md_collect_marks(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines, int table_m continue; } - /* A potential link or its part. */ + /* A potential link, footnote and similar or its part. */ if(ch == _T('[') || (ch == _T('!') && off+1 < line->end && CH(off+1) == _T('['))) { - OFF tmp = (ch == _T('[') ? off+1 : off+2); - ADD_MARK(ch, off, tmp, MD_MARK_POTENTIAL_OPENER); - off = tmp; + if(ch == _T('!')) { + ADD_MARK(ch, off+1, off+2, MD_MARK_POTENTIAL_OPENER | MD_MARK_BRACKET_CANBEIMAGE); + off += 2; + } else { + ADD_MARK(ch, off, off+1, MD_MARK_POTENTIAL_OPENER); + off += 1; + } + /* Two dummies to make enough place for data we need if it is * a link. */ ADD_MARK('D', off, off, 0); @@ -3327,6 +3565,30 @@ md_collect_marks(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines, int table_m continue; } + /* A potential highlight start/end: ==text== */ + if(ch == _T('=') && (ctx->parser.flags & MD_FLAG_HIGHLIGHT)) { + OFF tmp = off + 1; + + while(tmp < line->end && CH(tmp) == _T('=')) + tmp++; + + /* Only exactly two equals signs form a highlight delimiter. */ + if(tmp - off == 2) { + unsigned flags = MD_MARK_POTENTIAL_OPENER | MD_MARK_POTENTIAL_CLOSER; + + /* Cannot open before whitespace; cannot close after whitespace. */ + if(tmp >= line->end || ISUNICODEWHITESPACE(tmp)) + flags &= ~MD_MARK_POTENTIAL_OPENER; + if(off == line->beg || ISUNICODEWHITESPACEBEFORE(off)) + flags &= ~MD_MARK_POTENTIAL_CLOSER; + if(flags != 0) + ADD_MARK(ch, off, tmp, flags); + } + + off = tmp; + continue; + } + /* A potential strikethrough/subscript start/end. */ if(ch == _T('~')) { OFF tmp = off + 1; @@ -3429,14 +3691,14 @@ md_analyze_bracket(MD_CTX* ctx, int mark_index) * link in the right order, from inside to outside in case of nested * brackets. * - * The resolving itself is deferred to md_resolve_links(). + * The resolving itself is deferred to md_resolve_brackets(). */ MD_MARK* mark = &ctx->marks[mark_index]; if(mark->flags & MD_MARK_POTENTIAL_OPENER) { if(BRACKET_OPENERS.top >= 0) - ctx->marks[BRACKET_OPENERS.top].flags |= MD_MARK_HASNESTEDBRACKETS; + ctx->marks[BRACKET_OPENERS.top].flags |= MD_MARK_BRACKET_HASNESTED; md_mark_stack_push(ctx, &BRACKET_OPENERS, mark_index); return; @@ -3450,7 +3712,7 @@ md_analyze_bracket(MD_CTX* ctx, int mark_index) opener->next = mark_index; mark->prev = opener_index; - /* Add the pair into a list of potential links for md_resolve_links(). + /* Add the pair into a list of potential brackets for md_resolve_brackets(). * Note we misuse opener->prev for this as opener->next points to its * closer. */ if(ctx->unresolved_link_tail >= 0) @@ -3466,265 +3728,364 @@ md_analyze_bracket(MD_CTX* ctx, int mark_index) static void md_analyze_link_contents(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines, int mark_beg, int mark_end); +/* Try to resolve a bracket pair as a wiki link '[[destination]]' or + * '[[destination|label]]'. + * Returns TRUE if resolved, FALSE if not a wiki link, -1 on error. */ static int -md_resolve_links(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines) +md_resolve_bracket_wikilink(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines, + int opener_index, int closer_index, + MD_MARK* opener, MD_MARK* closer, + MD_MARK* next_opener, MD_MARK* next_closer, + OFF* last_link_beg, OFF* last_link_end, + int* p_opener_index) { - int opener_index = ctx->unresolved_link_head; - OFF last_link_beg = 0; - OFF last_link_end = 0; - OFF last_img_beg = 0; - OFF last_img_end = 0; + MD_MARK* delim = NULL; + int delim_index; + OFF dest_beg, dest_end; + OFF off; - while(opener_index >= 0) { - MD_MARK* opener = &ctx->marks[opener_index]; - int closer_index = opener->next; - MD_MARK* closer = &ctx->marks[closer_index]; - int next_index = opener->prev; - MD_MARK* next_opener; - MD_MARK* next_closer; - MD_LINK_ATTR attr; - int is_link = FALSE; + if(!(ctx->parser.flags & MD_FLAG_WIKILINKS)) + return FALSE; - if(next_index >= 0) { - next_opener = &ctx->marks[next_index]; - next_closer = &ctx->marks[next_opener->next]; - } else { - next_opener = NULL; - next_closer = NULL; - } + if(opener->ch != _T('[') || opener->end - opener->beg != 1 || + next_opener == NULL || next_opener->ch != _T('[') || next_opener->end - next_opener->beg != 1 || + next_closer == NULL || next_closer->ch != _T(']') || next_closer->end - next_closer->beg != 1) + return FALSE; - /* If nested ("[ [ ] ]"), we need to make sure that: - * - The outer does not end inside of (...) belonging to the inner. - * - The outer cannot be link if the inner is link (i.e. not image). - * - * (Note we here analyze from inner to outer as the marks are ordered - * by closer->beg.) - */ - if((opener->beg < last_link_beg && closer->end < last_link_end) || - (opener->beg < last_img_beg && closer->end < last_img_end) || - (opener->beg < last_link_end && opener->ch == '[')) - { - opener_index = next_index; - continue; + /* Check that the next_opener and next_closer are nested properly. */ + if(next_opener->beg != opener->beg - 1 || next_closer->beg != closer->beg + 1) + return FALSE; + + /* We don't allow destination to be longer than 100 characters. + * Lets scan to see whether there is '|'. (If not then the whole + * wiki-link has to be below the 100 characters.) */ + delim_index = opener_index + 1; + while(delim_index < closer_index) { + MD_MARK* m = &ctx->marks[delim_index]; + if(m->ch == _T('|')) { + delim = m; + break; + } + if(m->ch != _T('D')) { + if(m->beg - opener->end > 100) + break; + if(m->ch != _T('D') && (m->flags & MD_MARK_OPENER)) + delim_index = m->next; } + delim_index++; + } - /* Recognize and resolve wiki links. - * Wiki-links maybe '[[destination]]' or '[[destination|label]]'. - */ - if ((ctx->parser.flags & MD_FLAG_WIKILINKS) && - (opener->end - opener->beg == 1) && /* not image */ - next_opener != NULL && /* double '[' opener */ - next_opener->ch == '[' && - (next_opener->beg == opener->beg - 1) && - (next_opener->end - next_opener->beg == 1) && - next_closer != NULL && /* double ']' closer */ - next_closer->ch == ']' && - (next_closer->beg == closer->beg + 1) && - (next_closer->end - next_closer->beg == 1)) - { - MD_MARK* delim = NULL; - int delim_index; - OFF dest_beg, dest_end; - - is_link = TRUE; - - /* We don't allow destination to be longer than 100 characters. - * Lets scan to see whether there is '|'. (If not then the whole - * wiki-link has to be below the 100 characters.) */ - delim_index = opener_index + 1; - while(delim_index < closer_index) { - MD_MARK* m = &ctx->marks[delim_index]; - if(m->ch == '|') { - delim = m; - break; - } - if(m->ch != 'D') { - if(m->beg - opener->end > 100) - break; - if(m->ch != 'D' && (m->flags & MD_MARK_OPENER)) - delim_index = m->next; - } - delim_index++; - } + dest_beg = opener->end; + dest_end = (delim != NULL) ? delim->beg : closer->beg; + if(dest_end - dest_beg == 0 || dest_end - dest_beg > 100) + return FALSE; - dest_beg = opener->end; - dest_end = (delim != NULL) ? delim->beg : closer->beg; - if(dest_end - dest_beg == 0 || dest_end - dest_beg > 100) - is_link = FALSE; + /* There may not be any new line in the destination. */ + for(off = dest_beg; off < dest_end; off++) { + if(ISNEWLINE(off)) + return FALSE; + } - /* There may not be any new line in the destination. */ - if(is_link) { - OFF off; - for(off = dest_beg; off < dest_end; off++) { - if(ISNEWLINE(off)) { - is_link = FALSE; - break; - } - } - } + md_pop_openers(ctx, opener_index); - if(is_link) { - md_pop_openers(ctx, opener_index); + if(delim != NULL) { + if(delim->end < closer->beg) { + md_disable_marks(ctx, opener_index+1, delim_index); + delim->flags |= MD_MARK_RESOLVED; + opener->end = delim->beg; + } else { + /* The pipe is just before the closer: [[foo|]] */ + md_disable_marks(ctx, opener_index+1, closer_index); + closer->beg = delim->beg; + } + } - if(delim != NULL) { - if(delim->end < closer->beg) { - md_disable_marks(ctx, delim_index+1, delim_index); - delim->flags |= MD_MARK_RESOLVED; - opener->end = delim->beg; - } else { - /* The pipe is just before the closer: [[foo|]] */ - md_disable_marks(ctx, opener_index+1, closer_index); - closer->beg = delim->beg; - } - } + opener->beg = next_opener->beg; + closer->end = next_closer->end; + md_resolve_range(ctx, opener_index, closer_index); + + *last_link_beg = opener->beg; + *last_link_end = closer->end; + + if(delim != NULL) + md_analyze_link_contents(ctx, lines, n_lines, delim_index+1, closer_index); - opener->beg = next_opener->beg; - opener->next = closer_index; - opener->flags |= MD_MARK_OPENER | MD_MARK_RESOLVED; + *p_opener_index = next_opener->prev; + return TRUE; +} - closer->end = next_closer->end; - closer->prev = opener_index; - closer->flags |= MD_MARK_CLOSER | MD_MARK_RESOLVED; +/* Resolve footnote references [^label] in the current block. */ +static int +md_resolve_bracket_footnote(MD_CTX* ctx, MD_MARK* opener, MD_MARK* closer, + OFF* last_link_beg, OFF* last_link_end, + int* p_opener_index) +{ + MD_MARK* index_mark; + MD_FOOTNOTE_DEF* def; + OFF label_beg, label_end; - last_link_beg = opener->beg; - last_link_end = closer->end; + if(!(ctx->parser.flags & MD_FLAG_FOOTNOTES)) + return FALSE; + if(opener->ch != _T('[') || opener->end >= ctx->size || CH(opener->end) != _T('^')) + return FALSE; - if(delim != NULL) - md_analyze_link_contents(ctx, lines, n_lines, delim_index+1, closer_index); + /* Expand the opener to eat the '^' */ + opener->end++; - opener_index = next_opener->prev; - continue; - } + closer = &ctx->marks[opener->next]; + + /* Label is the raw text between the opener end and the closer begin. + * opener->end points past [^, closer->beg points to ]. */ + label_beg = opener->end; + label_end = closer->beg; + + if(label_beg >= label_end) + return FALSE; /* empty label */ + + def = md_lookup_footnote_def(ctx, STR(label_beg), label_end - label_beg); + if(def == NULL) + return FALSE; + + /* Assign index on first reference. */ + if(def->index == 0) + def->index = ++ctx->next_footnote_index; + def->ref_count++; + + /* Store the public callback details in the dummy mark after the opener. */ + index_mark = opener + 1; + MD_ASSERT(index_mark->ch == _T('D')); + index_mark->beg = def->index; + index_mark->end = def->ref_count; + + /* Mark as resolved. */ + opener->flags |= MD_MARK_OPENER | MD_MARK_RESOLVED | MD_MARK_BRACKET_FOOTNOTEREF; + closer->flags |= MD_MARK_CLOSER | MD_MARK_RESOLVED | MD_MARK_BRACKET_FOOTNOTEREF; + *last_link_beg = opener->beg; + *last_link_end = closer->end; + + *p_opener_index = opener->prev; + return TRUE; +} + +/* Try to resolve a bracket pair as a CommonMark link or image. + * Returns -1 on error, 0 otherwise. */ +static int +md_resolve_bracket_link(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines, + int opener_index, int closer_index, + MD_MARK* opener, MD_MARK* closer, + MD_MARK* next_opener, MD_MARK* next_closer, + int* p_next_index, + OFF* last_link_beg, OFF* last_link_end, + OFF* last_img_beg, OFF* last_img_end) +{ + MD_LINK_ATTR attr; + int is_link = FALSE; + + if(next_opener != NULL && next_opener->beg == closer->end) { + if(next_closer->beg > closer->end + 1) { + /* Might be full reference link. */ + if(!(next_opener->flags & MD_MARK_BRACKET_HASNESTED)) + is_link = md_is_link_reference(ctx, lines, n_lines, next_opener->beg, next_closer->end, &attr); + } else { + /* Might be shortcut reference link. */ + if(!(opener->flags & MD_MARK_BRACKET_HASNESTED)) + is_link = md_is_link_reference(ctx, lines, n_lines, opener->beg, closer->end, &attr); } - if(next_opener != NULL && next_opener->beg == closer->end) { - if(next_closer->beg > closer->end + 1) { - /* Might be full reference link. */ - if(!(next_opener->flags & MD_MARK_HASNESTEDBRACKETS)) - is_link = md_is_link_reference(ctx, lines, n_lines, next_opener->beg, next_closer->end, &attr); - } else { - /* Might be shortcut reference link. */ - if(!(opener->flags & MD_MARK_HASNESTEDBRACKETS)) - is_link = md_is_link_reference(ctx, lines, n_lines, opener->beg, closer->end, &attr); - } + if(is_link < 0) + return -1; + + if(is_link) { + /* Eat the 2nd "[...]". */ + closer->end = next_closer->end; + /* Do not analyze the label as a standalone link in the next + * iteration. */ + *p_next_index = ctx->marks[*p_next_index].prev; + } + } else { + if(closer->end < ctx->size && CH(closer->end) == _T('(')) { + /* Might be inline link. */ + OFF inline_link_end = UINT_MAX; + int following_mark_index = closer_index + 1; + + is_link = md_is_inline_link_spec(ctx, lines, n_lines, closer->end, &inline_link_end, &attr); if(is_link < 0) return -1; + /* Check the closing ')' is not inside an already resolved range + * (i.e. a range with a higher priority), e.g. a code span. */ if(is_link) { - /* Eat the 2nd "[...]". */ - closer->end = next_closer->end; + while(following_mark_index < ctx->n_marks) { + MD_MARK* mark = &ctx->marks[following_mark_index]; - /* Do not analyze the label as a standalone link in the next - * iteration. */ - next_index = ctx->marks[next_index].prev; - } - } else { - if(closer->end < ctx->size && CH(closer->end) == _T('(')) { - /* Might be inline link. */ - OFF inline_link_end = UINT_MAX; - int following_mark_index = closer_index + 1; - - is_link = md_is_inline_link_spec(ctx, lines, n_lines, closer->end, &inline_link_end, &attr); - if(is_link < 0) - return -1; - - /* Check the closing ')' is not inside an already resolved range - * (i.e. a range with a higher priority), e.g. a code span. */ - if(is_link) { - while(following_mark_index < ctx->n_marks) { - MD_MARK* mark = &ctx->marks[following_mark_index]; - - if(mark->beg >= inline_link_end) + if(mark->beg >= inline_link_end) + break; + if((mark->flags & (MD_MARK_OPENER | MD_MARK_RESOLVED)) == (MD_MARK_OPENER | MD_MARK_RESOLVED)) { + if(ctx->marks[mark->next].beg >= inline_link_end) { + /* Cancel the link status. */ + if(attr.title_needs_free) + free(attr.title); + is_link = FALSE; break; - if((mark->flags & (MD_MARK_OPENER | MD_MARK_RESOLVED)) == (MD_MARK_OPENER | MD_MARK_RESOLVED)) { - if(ctx->marks[mark->next].beg >= inline_link_end) { - /* Cancel the link status. */ - if(attr.title_needs_free) - free(attr.title); - is_link = FALSE; - break; - } - - following_mark_index = mark->next + 1; - } else { - following_mark_index++; } - } - } - if(is_link) { - /* Eat the "(...)" */ - closer->end = inline_link_end; - md_disable_marks(ctx, closer_index+1, following_mark_index); + following_mark_index = mark->next + 1; + } else { + following_mark_index++; + } } } - if(!is_link) { - /* Might be collapsed reference link. */ - if(!(opener->flags & MD_MARK_HASNESTEDBRACKETS)) - is_link = md_is_link_reference(ctx, lines, n_lines, opener->beg, closer->end, &attr); - if(is_link < 0) - return -1; + if(is_link) { + /* Eat the "(...)" */ + closer->end = inline_link_end; + md_disable_marks(ctx, closer_index+1, following_mark_index); } } - if(is_link) { - /* Resolve the brackets as a link. */ - opener->flags |= MD_MARK_OPENER | MD_MARK_RESOLVED; - closer->flags |= MD_MARK_CLOSER | MD_MARK_RESOLVED; - - /* If it is a link, we store the destination and title in the two - * dummy marks after the opener. */ - MD_ASSERT(ctx->marks[opener_index+1].ch == 'D'); - ctx->marks[opener_index+1].beg = attr.dest_beg; - ctx->marks[opener_index+1].end = attr.dest_end; - - MD_ASSERT(ctx->marks[opener_index+2].ch == 'D'); - md_mark_store_ptr(ctx, opener_index+2, attr.title); - /* The title might or might not have been allocated for us. */ - if(attr.title_needs_free) - md_mark_stack_push(ctx, &ctx->ptr_stack, opener_index+2); - ctx->marks[opener_index+2].prev = attr.title_size; - - if(opener->ch == '[') { - last_link_beg = opener->beg; - last_link_end = closer->end; - } else { - last_img_beg = opener->beg; - last_img_end = closer->end; - } + if(!is_link) { + /* Might be collapsed reference link. */ + if(!(opener->flags & MD_MARK_BRACKET_HASNESTED)) + is_link = md_is_link_reference(ctx, lines, n_lines, opener->beg, closer->end, &attr); + if(is_link < 0) + return -1; + } + } - md_analyze_link_contents(ctx, lines, n_lines, opener_index+1, closer_index); + if(is_link) { + /* Resolve the brackets as a link. */ + opener->flags |= MD_MARK_OPENER | MD_MARK_RESOLVED; + closer->flags |= MD_MARK_CLOSER | MD_MARK_RESOLVED; - /* If the link text is formed by nothing but permissive autolink, - * suppress the autolink. - * See https://github.com/mity/md4c/issues/152 for more info. */ - if(ctx->parser.flags & MD_FLAG_PERMISSIVEAUTOLINKS) { - MD_MARK* first_nested; - MD_MARK* last_nested; + /* If it is a link, we store the destination and title in the two + * dummy marks after the opener. */ + MD_ASSERT(ctx->marks[opener_index+1].ch == _T('D')); + ctx->marks[opener_index+1].beg = attr.dest_beg; + ctx->marks[opener_index+1].end = attr.dest_end; - first_nested = opener + 1; - while(first_nested->ch == _T('D') && first_nested < closer) - first_nested++; + MD_ASSERT(ctx->marks[opener_index+2].ch == _T('D')); + md_mark_store_ptr(ctx, opener_index+2, attr.title); + /* The title might or might not have been allocated for us. */ + if(attr.title_needs_free) + md_mark_stack_push(ctx, &ctx->ptr_stack, opener_index+2); + ctx->marks[opener_index+2].prev = attr.title_size; - last_nested = closer - 1; - while(first_nested->ch == _T('D') && last_nested > opener) - last_nested--; + if(opener->ch == _T('[')) { + *last_link_beg = opener->beg; + *last_link_end = closer->end; + } else { + *last_img_beg = opener->beg; + *last_img_end = closer->end; + } - if((first_nested->flags & MD_MARK_RESOLVED) && - first_nested->beg == opener->end && - ISANYOF_(first_nested->ch, _T("@:.")) && - first_nested->next == (last_nested - ctx->marks) && - last_nested->end == closer->beg) - { - first_nested->ch = _T('D'); - first_nested->flags &= ~MD_MARK_RESOLVED; - last_nested->ch = _T('D'); - last_nested->flags &= ~MD_MARK_RESOLVED; - } + md_analyze_link_contents(ctx, lines, n_lines, opener_index+1, closer_index); + + /* If the link text is formed by nothing but permissive autolink, + * suppress the autolink. + * See https://github.com/mity/md4c/issues/152 for more info. */ + if(ctx->parser.flags & MD_FLAG_PERMISSIVEAUTOLINKS) { + MD_MARK* first_nested; + MD_MARK* last_nested; + + first_nested = opener + 1; + while(first_nested->ch == _T('D') && first_nested < closer) + first_nested++; + + last_nested = closer - 1; + while(first_nested->ch == _T('D') && last_nested > opener) + last_nested--; + + if((first_nested->flags & MD_MARK_RESOLVED) && + first_nested->beg == opener->end && + ISANYOF_(first_nested->ch, _T("@:.")) && + first_nested->next == (last_nested - ctx->marks) && + last_nested->end == closer->beg) + { + first_nested->ch = _T('D'); + first_nested->flags &= ~MD_MARK_RESOLVED; + last_nested->ch = _T('D'); + last_nested->flags &= ~MD_MARK_RESOLVED; } } + } + + return 0; +} + +/* Resolve bracket pairs as links, wiki links, or (in a 2nd pass) footnotes. */ +static int +md_resolve_brackets(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines) +{ + int opener_index = ctx->unresolved_link_head; + OFF last_link_beg = 0; + OFF last_link_end = 0; + OFF last_img_beg = 0; + OFF last_img_end = 0; + int ret; + + /* Note we here analyze from inner to outer as the marks are ordered + * by closer->beg. */ + while(opener_index >= 0) { + MD_MARK* opener = &ctx->marks[opener_index]; + int closer_index = opener->next; + MD_MARK* closer = &ctx->marks[closer_index]; + int next_index = opener->prev; + MD_MARK* next_opener; + MD_MARK* next_closer; + + if(opener->ch == _T('D')) { + /* We could have this disabled in previous iterations, and + * processing would be just burning CPU cycles. */ + opener_index = next_index; + continue; + } + + if(next_index >= 0) { + next_opener = &ctx->marks[next_index]; + next_closer = &ctx->marks[next_opener->next]; + } else { + next_opener = NULL; + next_closer = NULL; + } + + /* We can perhaps be an image? */ + if(opener->flags & MD_MARK_BRACKET_CANBEIMAGE) { + opener->ch = _T('!'); + opener->beg--; + } + + /* If nested ("[ [ ] ]"), we need to make sure that: + * - The outer does not end inside of (...) belonging to the inner. + * - The outer cannot be link if the inner is link (but this does not apply to images). */ + if((opener->beg < last_link_beg && closer->end < last_link_end) || + (opener->beg < last_img_beg && closer->end < last_img_end) || + (opener->beg < last_link_end && opener->ch != _T('!'))) + { + opener_index = next_index; + continue; + } + + ret = md_resolve_bracket_footnote(ctx, opener, closer, + &last_link_beg, &last_link_end, &opener_index); + if(ret < 0) + return -1; + if(ret > 0) + continue; + + ret = md_resolve_bracket_wikilink(ctx, lines, n_lines, opener_index, closer_index, + opener, closer, next_opener, next_closer, + &last_link_beg, &last_link_end, &opener_index); + if(ret < 0) + return -1; + if(ret > 0) + continue; + + ret = md_resolve_bracket_link(ctx, lines, n_lines, opener_index, closer_index, + opener, closer, next_opener, next_closer, &next_index, + &last_link_beg, &last_link_end, &last_img_beg, &last_img_end); + if(ret < 0) + return -1; opener_index = next_index; } @@ -3951,6 +4312,27 @@ md_analyze_spoiler(MD_CTX* ctx, int mark_index) md_mark_stack_push(ctx, &PIPE_OPENERS, mark_index); } +static void +md_analyze_highlight(MD_CTX* ctx, int mark_index) +{ + MD_MARK* mark = &ctx->marks[mark_index]; + + /* Only "==" is recognized as a highlight mark. */ + if(mark->end - mark->beg != 2) + return; + + if((mark->flags & MD_MARK_POTENTIAL_CLOSER) && EQUAL_OPENERS.top >= 0) { + int opener_index = EQUAL_OPENERS.top; + + md_pop_openers(ctx, opener_index); + md_resolve_range(ctx, opener_index, mark_index); + return; + } + + if(mark->flags & MD_MARK_POTENTIAL_OPENER) + md_mark_stack_push(ctx, &EQUAL_OPENERS, mark_index); +} + static MD_MARK* md_scan_left_for_resolved_mark(MD_CTX* ctx, MD_MARK* mark_from, OFF off, MD_MARK** p_cursor) { @@ -3995,139 +4377,163 @@ md_scan_right_for_resolved_mark(MD_CTX* ctx, MD_MARK* mark_from, OFF off, MD_MAR return NULL; } +static int +md_analyze_permissive_autolink_segment(MD_CTX* ctx, OFF off, OFF end, OFF* p_end, + int scan_backwards, MD_CHAR component_delim, const MD_CHAR* word_extra, + const MD_CHAR* word_delims, MD_MARK** p_cursor) +{ + int n_components = 0; + int n_open_brackets = 0; + int seen_word_delim = TRUE; + int seen_component_delim = TRUE; + OFF component_beg = off; + + if(word_extra == NULL) + word_extra = _T(""); + if(word_delims == NULL) + word_delims = _T(""); + + while(off != end) { + if(scan_backwards) + off--; + + /* Only accept extra and delimiter characters if they're not part of a + * resolved mark. */ + if(!ISALNUM(off) && !ISWHITESPACE(off)) { + if((!scan_backwards && md_scan_right_for_resolved_mark(ctx, *p_cursor, off, p_cursor) != NULL) || + (scan_backwards && md_scan_left_for_resolved_mark(ctx, *p_cursor, off, p_cursor) != NULL)) { + if(scan_backwards) + off++; + break; + } + } + + /* The autolink can be _inside_ brackets so we disallow unbalanced bracket pairs in the URL. + * (Note the brackets are not allowed in e-mail username, so we happily skip this in that case.) */ + if(!scan_backwards) { + if(CH(off) == _T('(')) { + n_open_brackets++; + } else if(CH(off) == _T(')')) { + if(n_open_brackets <= 0) + break; + n_open_brackets--; + } + } + + if(ISALNUM(off) || ISANYOF(off, word_extra)) { + seen_word_delim = FALSE; + seen_component_delim = FALSE; + } else { + if(seen_word_delim) + break; + + if(ISANYOF(off, word_delims)) { + seen_word_delim = TRUE; + } else if(component_delim != _T('\0') && CH(off) == component_delim) { + if(seen_component_delim) + break; + seen_component_delim = TRUE; + component_beg = off; + n_components++; + } else { + if(scan_backwards) + off++; + break; + } + } + + if(!scan_backwards) + off++; + } + + /* Rollback falsely consumed delimiter. */ + if(seen_word_delim || seen_component_delim) + off = (scan_backwards ? off+1 : off-1); + + if(off != component_beg) + n_components++; + + if(n_open_brackets != 0) + return -1; + + *p_end = off; + return n_components; +} + static void md_analyze_permissive_autolink(MD_CTX* ctx, int mark_index) { - static const struct { - const MD_CHAR start_char; - const MD_CHAR delim_char; - const MD_CHAR* allowed_nonalnum_chars_inside; - const MD_CHAR* allowed_nonalnum_chars_anywhere; - int min_components; - const MD_CHAR optional_end_char; - } URL_MAP[] = { - { _T('\0'), _T('.'), _T(".-_"), _T(""), 2, _T('\0') }, /* host, mandatory */ - { _T('/'), _T('/'), _T("/._"), _T("+-"), 0, _T('/') }, /* path */ - { _T('?'), _T('&'), _T("&.-+_=()"), _T(""), 1, _T('\0') }, /* query */ - { _T('#'), _T('\0'), _T(".-+_") , _T(""), 1, _T('\0') } /* fragment */ - }; - MD_MARK* opener = &ctx->marks[mark_index]; MD_MARK* closer = &ctx->marks[mark_index + 1]; /* The dummy. */ - OFF line_beg = closer->beg; /* md_collect_mark() set this for us */ + OFF line_beg = closer->beg; /* md_collect_mark() sets this for us */ OFF line_end = closer->end; /* ditto */ OFF beg = opener->beg; OFF end = opener->end; MD_MARK* left_cursor = opener; - int left_boundary_ok = FALSE; MD_MARK* right_cursor = opener; - int right_boundary_ok = FALSE; - unsigned i; MD_ASSERT(closer->ch == 'D'); + /* E-mail requires the user name (before '@', i.e. scanning backwards). */ if(opener->ch == '@') { MD_ASSERT(CH(opener->beg) == _T('@')); - - /* Scan backwards for the user name (before '@'). */ - while(beg > line_beg) { - if(ISALNUM(beg-1)) - beg--; - else if(beg >= line_beg+2 && ISALNUM(beg-2) && - ISANYOF(beg-1, _T(".-_+")) && - md_scan_left_for_resolved_mark(ctx, left_cursor, beg-1, &left_cursor) == NULL && - ISALNUM(beg)) - beg--; - else - break; - } - if(beg == opener->beg) /* empty user name */ + if(md_analyze_permissive_autolink_segment(ctx, beg, line_beg, &beg, TRUE, + _T('\0'), NULL, _T(".-_+"), &left_cursor) < 1) return; } /* Verify there's line boundary, whitespace, allowed punctuation or * resolved opener mark just before the suspected autolink. */ - if(beg == line_beg || ISUNICODEWHITESPACEBEFORE(beg) || ISANYOF(beg-1, _T("({["))) { - left_boundary_ok = TRUE; - } else { - MD_MARK* left_mark; + if(beg > line_beg && !ISUNICODEWHITESPACEBEFORE(beg) && !ISANYOF(beg-1, _T("({["))) { + MD_MARK* mark; - left_mark = md_scan_left_for_resolved_mark(ctx, left_cursor, beg-1, &left_cursor); - if(left_mark != NULL && (left_mark->flags & MD_MARK_OPENER)) - left_boundary_ok = TRUE; + mark = md_scan_left_for_resolved_mark(ctx, left_cursor, beg-1, &left_cursor); + if(mark == NULL || !(mark->flags & MD_MARK_OPENER)) + return; } - if(!left_boundary_ok) - return; - for(i = 0; i < SIZEOF_ARRAY(URL_MAP); i++) { - int n_components = 0; - int n_open_brackets = 0; - int component_len = 0; - - if(URL_MAP[i].start_char != _T('\0')) { - if(end >= line_end || CH(end) != URL_MAP[i].start_char) - continue; - if(URL_MAP[i].min_components > 0 && (end+1 >= line_end || !ISALNUM(end+1))) - continue; - end++; - } + /* Scan for hostname segment. Hostname is mandatory and requires at least two + * components delimited with a dot. */ + if(md_analyze_permissive_autolink_segment(ctx, end, line_end, &end, FALSE, + _T('.'), NULL, _T("-_"), &right_cursor) < 2) + return; - while(end < line_end) { - if(ISALNUM(end) || ISANYOF(end, URL_MAP[i].allowed_nonalnum_chars_anywhere)) { - if(n_components == 0) - n_components++; - component_len++; - end++; - } else if(component_len > 0 && CH(end) == URL_MAP[i].delim_char && end+1 < line_end && - (ISALNUM(end+1) || ISANYOF(end+1, URL_MAP[i].allowed_nonalnum_chars_anywhere))) { - n_components++; - component_len = 0; - end++; - } else if(ISANYOF(end, URL_MAP[i].allowed_nonalnum_chars_inside) && - md_scan_right_for_resolved_mark(ctx, right_cursor, end, &right_cursor) == NULL && - ((end > line_beg && (ISALNUM(end-1) || CH(end-1) == _T(')'))) || CH(end) == _T('(')) && - ((end+1 < line_end && (ISALNUM(end+1) || CH(end+1) == _T('('))) || CH(end) == _T(')'))) - { - /* brackets have to be balanced. */ - if(CH(end) == _T('(')) { - n_open_brackets++; - } else if(CH(end) == _T(')')) { - if(n_open_brackets <= 0) - break; - n_open_brackets--; - } + if(opener->ch != '@') { + /* Scan for path segment. */ + if(end < line_end && CH(end) == _T('/')) { + if(md_analyze_permissive_autolink_segment(ctx, end+1, line_end, &end, FALSE, + _T('/'), _T(".+-_~"), NULL, &right_cursor) < 0) + return; - component_len++; + /* Path can also end with additional '/' if its a directory. */ + if(end < line_end && CH(end) == _T('/')) end++; - } else { - break; - } } - if(end < line_end && URL_MAP[i].optional_end_char != _T('\0') && - CH(end) == URL_MAP[i].optional_end_char) - end++; - - if(n_components < URL_MAP[i].min_components || n_open_brackets != 0) - return; + /* Scan for query segment. */ + if(end < line_end && CH(end) == _T('?')) { + if(md_analyze_permissive_autolink_segment(ctx, end+1, line_end, &end, FALSE, + _T('&'), _T("._=()"), _T("+-"), &right_cursor) < 0) + return; + } - if(opener->ch == '@') /* E-mail autolinks wants only the host. */ - break; + /* Scan for fragment segment. */ + if(end < line_end && CH(end) == _T('#')) { + if(md_analyze_permissive_autolink_segment(ctx, end+1, line_end, &end, FALSE, + _T('\0'), NULL, _T(".-+_"), &right_cursor) < 0) + return; + } } /* Verify there's line boundary, whitespace, allowed punctuation or * resolved closer mark just after the suspected autolink. */ - if(end == line_end || ISUNICODEWHITESPACE(end) || ISANYOF(end, _T(")}].!?,;"))) { - right_boundary_ok = TRUE; - } else { - MD_MARK* right_mark; + if(end < line_end && !ISUNICODEWHITESPACE(end) && !ISANYOF(end, _T(")}].!?,;"))) { + MD_MARK* mark; - right_mark = md_scan_right_for_resolved_mark(ctx, right_cursor, end, &right_cursor); - if(right_mark != NULL && (right_mark->flags & MD_MARK_CLOSER)) - right_boundary_ok = TRUE; + mark = md_scan_right_for_resolved_mark(ctx, right_cursor, end, &right_cursor); + if(mark == NULL || !(mark->flags & MD_MARK_CLOSER)) + return; } - if(!right_boundary_ok) - return; /* Success, we are an autolink. */ opener->beg = beg; @@ -4193,6 +4599,7 @@ md_analyze_marks(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines, case ':': /* Pass through. */ case '@': md_analyze_permissive_autolink(ctx, i); break; case '|': md_analyze_spoiler(ctx, i); break; + case '=': md_analyze_highlight(ctx, i); break; } if(mark->flags & MD_MARK_RESOLVED) { @@ -4219,9 +4626,9 @@ md_analyze_inlines(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines, int table /* Collect all marks. */ MD_CHECK(md_collect_marks(ctx, lines, n_lines, table_mode)); - /* (1) Links. */ + /* (1) Bracket spans: links, wiki links, footnotes. */ md_analyze_marks(ctx, lines, n_lines, 0, ctx->n_marks, _T("[]!"), NULL); - MD_CHECK(md_resolve_links(ctx, lines, n_lines)); + MD_CHECK(md_resolve_brackets(ctx, lines, n_lines)); BRACKET_OPENERS.top = -1; ctx->unresolved_link_head = -1; ctx->unresolved_link_tail = -1; @@ -4259,6 +4666,8 @@ md_analyze_link_contents(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines, emph_mark_types[n_emph_mark_types++] = _T('_'); if(ctx->parser.flags & MD_FLAG_LATEXMATHSPANS) emph_mark_types[n_emph_mark_types++] = _T('$'); + if(ctx->parser.flags & MD_FLAG_HIGHLIGHT) + emph_mark_types[n_emph_mark_types++] = _T('='); if(ctx->parser.flags & MD_FLAG_SPOILERS) emph_mark_types[n_emph_mark_types++] = _T('|'); if(ctx->parser.flags & MD_FLAG_SUPERSCRIPTS) @@ -4338,6 +4747,28 @@ md_enter_leave_span_wikilink(MD_CTX* ctx, int enter, const CHAR* target, SZ targ return ret; } +static int +md_enter_leave_span_footnote_ref(MD_CTX* ctx, unsigned int id, + unsigned int ref_id, const CHAR* label, SZ label_size) +{ + MD_ATTRIBUTE_BUILD label_build = { 0 }; + MD_SPAN_FOOTNOTE_REF_DETAIL det; + int ret = 0; + + memset(&det, 0, sizeof(MD_SPAN_FOOTNOTE_REF_DETAIL)); + det.id = id; + det.ref_id = ref_id; + MD_CHECK(md_build_attribute(ctx, label, label_size, 0, + &det.label, &label_build)); + + MD_ENTER_SPAN(MD_SPAN_FOOTNOTE_REF, &det); + MD_LEAVE_SPAN(MD_SPAN_FOOTNOTE_REF, &det); + +abort: + md_free_attribute(ctx, &label_build); + return ret; +} + /* Render the output, accordingly to the analyzed ctx->marks. */ static int @@ -4464,6 +4895,15 @@ md_process_inlines(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines) } break; + case '=': + if(mark->end - mark->beg == 2) { + if(mark->flags & MD_MARK_OPENER) + MD_ENTER_SPAN(MD_SPAN_MARK, NULL); + else + MD_LEAVE_SPAN(MD_SPAN_MARK, NULL); + } + break; + case '$': if(mark->flags & MD_MARK_OPENER) { MD_ENTER_SPAN((mark->end - off) % 2 ? MD_SPAN_LATEXMATH : MD_SPAN_LATEXMATH_DISPLAY, NULL); @@ -4474,7 +4914,7 @@ md_process_inlines(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines) } break; - case '[': /* Link, wiki link, image. */ + case '[': /* Footnote reference, link, wiki link, or image. */ case '!': case ']': { @@ -4483,6 +4923,24 @@ md_process_inlines(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines) const MD_MARK* dest_mark; const MD_MARK* title_mark; + /* Footnote reference: self-contained span, no text emitted. + * Only the opener is ever processed here; the closer mark + * is guaranteed to be skipped by the off-advance below. */ + if(opener->flags & MD_MARK_BRACKET_FOOTNOTEREF) { + MD_MARK* index_mark = (MD_MARK*) opener + 1; + MD_ASSERT(mark->ch != ']'); + MD_ASSERT(index_mark->ch == 'D'); + MD_CHECK(md_enter_leave_span_footnote_ref(ctx, + (unsigned int) index_mark->beg, + (unsigned int) index_mark->end, + STR(opener->end), closer->beg - opener->end)); + /* Redirect the opener's end past the whole [^label] + * so that the post-switch "off = mark->end" skips + * the label text and the closing ]. */ + ((MD_MARK*) mark)->end = closer->end; + break; + } + if ((opener->ch == '[' && closer->ch == ']') && opener->end - opener->beg >= 2 && closer->end - closer->beg >= 2) @@ -4823,6 +5281,7 @@ struct MD_BLOCK_tag { * MD_BLOCK_CODE: Non-zero if fenced, zero if indented. * MD_BLOCK_LI: Task mark character (0 if not task list item, 'x', 'X' or ' '). * MD_BLOCK_TABLE: Column count (as determined by the table underline). + * MD_BLOCK_ADMONITION: Admonition type. */ unsigned data : 16; @@ -4835,8 +5294,10 @@ struct MD_BLOCK_tag { struct MD_CONTAINER_tag { CHAR ch; - unsigned is_loose : 8; - unsigned is_task : 8; + unsigned is_loose : 1; + unsigned is_task : 1; + unsigned is_admonition : 1; + unsigned admonition_type : 3; unsigned start; unsigned mark_indent; unsigned contents_indent; @@ -4960,7 +5421,7 @@ md_setup_fenced_code_detail(MD_CTX* ctx, const MD_BLOCK* block, MD_BLOCK_CODE_DE } static int -md_process_leaf_block(MD_CTX* ctx, const MD_BLOCK* block) +md_process_leaf_block(MD_CTX* ctx, MD_BLOCK* block) { union { MD_BLOCK_H_DETAIL header; @@ -4973,6 +5434,31 @@ md_process_leaf_block(MD_CTX* ctx, const MD_BLOCK* block) int clean_fence_code_detail = FALSE; int ret = 0; + /* For large tables check the table density: If it's too low, lets suppress + * its interpretation as a table, as a safety measure against quadratic + * output size explosion. See https://github.com/mity/md4c/issues/345 */ + if(block->type == MD_BLOCK_TABLE) { + unsigned n_cols = block->data; + unsigned n_rows = block->n_lines; + + if(n_cols > 32 && n_rows > 4096 / n_cols) { + const MD_LINE* lines = (const MD_LINE*)(block + 1); + SZ table_input_size = n_rows; + unsigned i; + + for(i = 0; i < n_rows; i++) + table_input_size += lines[i].end - lines[i].beg; + + if(table_input_size / n_cols < n_rows / 4) { + /* Number of characters encoding the table in the input is + * lower then 25% of all cells to be generated? */ + MD_LOG("Suppressing too sparse table " + "(see https://github.com/mity/md4c/issues/345)"); + block->type = MD_BLOCK_P; + } + } + } + memset(&det, 0, sizeof(det)); if(ctx->n_containers == 0) @@ -5046,9 +5532,13 @@ md_process_leaf_block(MD_CTX* ctx, const MD_BLOCK* block) return ret; } +static const MD_CHAR* MD_ADMONITION_TAGS[] = { _T("note"), _T("tip"), _T("important"), _T("warning"), _T("caution") }; + static int md_process_all_blocks(MD_CTX* ctx) { + MD_TEXTTYPE adm_substr_types[1] = { MD_TEXT_NORMAL }; + MD_OFFSET adm_substr_offsets[2]; int byte_off = 0; int ret = 0; @@ -5064,6 +5554,7 @@ md_process_all_blocks(MD_CTX* ctx) MD_BLOCK_UL_DETAIL ul; MD_BLOCK_OL_DETAIL ol; MD_BLOCK_LI_DETAIL li; + MD_BLOCK_ADMONITION_DETAIL adm; } det; switch(block->type) { @@ -5074,7 +5565,7 @@ md_process_all_blocks(MD_CTX* ctx) case MD_BLOCK_OL: det.ol.start = block->n_lines; - det.ol.is_tight = (block->flags & MD_BLOCK_LOOSE_LIST) ? FALSE : TRUE; + det.ol.is_tight = (block->flags & MD_BLOCK_LOOSE_LIST) ? FALSE : TRUE; det.ol.mark_delimiter = (CHAR) block->data; break; @@ -5084,6 +5575,16 @@ md_process_all_blocks(MD_CTX* ctx) det.li.task_mark_offset = (OFF) block->n_lines; break; + case MD_BLOCK_ADMONITION: + adm_substr_offsets[0] = 0; + adm_substr_offsets[1] = md_strlen(MD_ADMONITION_TAGS[block->data]); + + det.adm.type.text = MD_ADMONITION_TAGS[block->data]; + det.adm.type.size = adm_substr_offsets[1]; + det.adm.type.substr_types = adm_substr_types; + det.adm.type.substr_offsets = adm_substr_offsets; + break; + default: /* noop */ break; @@ -5093,7 +5594,8 @@ md_process_all_blocks(MD_CTX* ctx) if(block->flags & MD_BLOCK_CONTAINER_CLOSER) { MD_LEAVE_BLOCK(block->type, &det); - if(block->type == MD_BLOCK_UL || block->type == MD_BLOCK_OL || block->type == MD_BLOCK_QUOTE) + if(block->type == MD_BLOCK_UL || block->type == MD_BLOCK_OL || + block->type == MD_BLOCK_QUOTE || block->type == MD_BLOCK_ADMONITION) ctx->n_containers--; } @@ -5101,9 +5603,9 @@ md_process_all_blocks(MD_CTX* ctx) MD_ENTER_BLOCK(block->type, &det); if(block->type == MD_BLOCK_UL || block->type == MD_BLOCK_OL) { - ctx->containers[ctx->n_containers].is_loose = (block->flags & MD_BLOCK_LOOSE_LIST); + ctx->containers[ctx->n_containers].is_loose = (block->flags & MD_BLOCK_LOOSE_LIST) ? TRUE : FALSE; ctx->n_containers++; - } else if(block->type == MD_BLOCK_QUOTE) { + } else if(block->type == MD_BLOCK_QUOTE || block->type == MD_BLOCK_ADMONITION) { /* This causes that any text in a block quote, even if * nested inside a tight list item, is wrapped with *

...

. */ @@ -5215,11 +5717,11 @@ md_start_new_block(MD_CTX* ctx, const MD_LINE_ANALYSIS* line) return 0; } -/* Eat from start of current (textual) block any reference definitions and - * remember them so we can resolve any links referring to them. +/* Eat from start of current (textual) block any reference definitions and/or + * footnote definitions, and remember them. * - * (Reference definitions can only be at start of it as they cannot break - * a paragraph.) + * (Such definitions can only be at start of it as they cannot break a + * paragraph.) */ static int md_consume_link_reference_definitions(MD_CTX* ctx) @@ -5228,27 +5730,35 @@ md_consume_link_reference_definitions(MD_CTX* ctx) MD_SIZE n_lines = ctx->current_block->n_lines; MD_SIZE n = 0; - /* Compute how many lines at the start of the block form one or more - * reference definitions. */ while(n < n_lines) { - int n_link_ref_lines; + int n_consumed = 0; + + /* When footnotes are enabled, try footnote definition first for lines + * starting with [^, so they are not accidentally consumed as link ref + * definitions (which would also match [^label]: url). */ + if((ctx->parser.flags & MD_FLAG_FOOTNOTES) && + lines[n].beg + 1 < ctx->size && + CH(lines[n].beg) == _T('[') && CH(lines[n].beg + 1) == _T('^')) + { + n_consumed = md_is_footnote_definition(ctx, lines + n, n_lines - n); + if(n_consumed < 0) + return -1; + } - n_link_ref_lines = md_is_link_reference_definition(ctx, - lines + n, n_lines - n); - /* Not a reference definition? */ - if(n_link_ref_lines == 0) - break; + if(n_consumed == 0) { + n_consumed = md_is_link_reference_definition(ctx, lines + n, n_lines - n); + if(n_consumed < 0) + return -1; + } - /* We fail if it is the ref. def. but it could not be stored due - * a memory allocation error. */ - if(n_link_ref_lines < 0) - return -1; + if(n_consumed == 0) + break; - n += n_link_ref_lines; + n += n_consumed; } - /* If there was at least one reference definition, we need to remove - * its lines from the block, or perhaps even the whole block. */ + /* If there was at least one definition, we need to remove its lines from + * the block, or perhaps even the whole block. */ if(n > 0) { if(n == n_lines) { /* Remove complete block. */ @@ -5469,10 +5979,6 @@ md_is_table_underline(MD_CTX* ctx, OFF beg, OFF* p_end, unsigned* p_col_count) off++; col_count++; - if(col_count > TABLE_MAXCOLCOUNT) { - MD_LOG("Suppressing table (column_count >" STRINGIZE(TABLE_MAXCOLCOUNT) ")"); - return FALSE; - } /* Pipe delimiter (optional at the end of line). */ while(off < ctx->size && ISWHITESPACE(off)) @@ -5573,7 +6079,7 @@ struct TAG_tag { #ifdef X #undef X #endif -#define X(name) { _T(name), (sizeof(name)-1) / sizeof(CHAR) } +#define X(name) { _T(name), (sizeof(_T(name))-1) / sizeof(CHAR) } #define Xend { NULL, 0 } static const TAG t1[] = { X("pre"), X("script"), X("style"), X("textarea"), Xend }; @@ -5840,7 +6346,9 @@ md_enter_child_containers(MD_CTX* ctx, int n_children) break; case _T('>'): - MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_QUOTE, 0, 0, MD_BLOCK_CONTAINER_OPENER)); + MD_CHECK(md_push_container_bytes(ctx, + (c->is_admonition ? MD_BLOCK_ADMONITION : MD_BLOCK_QUOTE), + 0, c->admonition_type, MD_BLOCK_CONTAINER_OPENER)); break; default: @@ -5880,8 +6388,9 @@ md_leave_child_containers(MD_CTX* ctx, int n_keep) break; case _T('>'): - MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_QUOTE, 0, - 0, MD_BLOCK_CONTAINER_CLOSER)); + MD_CHECK(md_push_container_bytes(ctx, + (c->is_admonition ? MD_BLOCK_ADMONITION : MD_BLOCK_QUOTE), + 0, c->admonition_type, MD_BLOCK_CONTAINER_CLOSER)); break; default: @@ -6436,8 +6945,30 @@ md_analyze_line(MD_CTX* ctx, OFF beg, OFF* p_end, ctx->containers[n_parents].task_mark_off = container.task_mark_off; } - if(n_children > 0) + if(n_children > 0) { + /* Check for admonition tag. */ + if((ctx->parser.flags & MD_FLAG_ADMONITIONS) && n_children > 0 && + ctx->containers[ctx->n_containers-1].ch == _T('>') && line->type == MD_LINE_TEXT && + 3 < line->end - line->beg && line->end - line->beg < 16 && + CH(line->beg) == _T('[') && CH(line->beg+1) == _T('!') && CH(line->end-1) == _T(']')) + { + unsigned i; + + for(i = 0; i < SIZEOF_ARRAY(MD_ADMONITION_TAGS); i++) { + if(line->end - line->beg == md_strlen(MD_ADMONITION_TAGS[i]) + 3 && + md_ascii_case_eq(STR(line->beg+2), MD_ADMONITION_TAGS[i], line->end - line->beg - 3)) + { + ctx->containers[ctx->n_containers-1].is_admonition = TRUE; + ctx->containers[ctx->n_containers-1].admonition_type = i; + line->type = MD_LINE_BLANK; + break; + } + } + } + + /* Enter all the child container blocks. */ MD_CHECK(md_enter_child_containers(ctx, n_children)); + } abort: return ret; @@ -6519,6 +7050,74 @@ md_process_line(MD_CTX* ctx, const MD_LINE_ANALYSIS** p_pivot_line, MD_LINE_ANAL return ret; } +static int +md_footnote_def_cmp_index(const void* a, const void* b) +{ + const MD_FOOTNOTE_DEF* da = (const MD_FOOTNOTE_DEF*) a; + const MD_FOOTNOTE_DEF* db = (const MD_FOOTNOTE_DEF*) b; + + /* Unreferenced defs (index == 0) always sort after referenced ones. */ + if(da->index == 0 && db->index == 0) return 0; + if(da->index == 0) return +1; + if(db->index == 0) return -1; + if(da->index < db->index) return -1; + if(da->index > db->index) return +1; + return 0; +} + +static int +md_process_footnote_def(MD_CTX* ctx, MD_FOOTNOTE_DEF* def) +{ + MD_BLOCK_FOOTNOTE_DEF_DETAIL det; + MD_ATTRIBUTE_BUILD label_build = { 0 }; + int ret = 0; + + memset(&det, 0, sizeof(MD_BLOCK_FOOTNOTE_DEF_DETAIL)); + det.id = def->index; + det.ref_count = def->ref_count; + MD_CHECK(md_build_attribute(ctx, def->entry.label, def->entry.label_size, 0, + &det.label, &label_build)); + + MD_ENTER_BLOCK(MD_BLOCK_FOOTNOTE_DEF, &det); + MD_CHECK(md_process_normal_block_contents(ctx, def->content_lines, + def->n_content_lines)); + MD_LEAVE_BLOCK(MD_BLOCK_FOOTNOTE_DEF, &det); + +abort: + md_free_attribute(ctx, &label_build); + return ret; +} + +/* Render footnote definitions that were actually referenced, in reference order. + * Called from md_process_doc() after md_process_all_blocks(). */ +static int +md_process_footnote_defs(MD_CTX* ctx) +{ + unsigned i; + int ret = 0; + + if(ctx->footnote_hashtable.n_defs == 0 || ctx->next_footnote_index == 0) + return 0; + + /* Sort defs by index so we emit in reference order. */ + qsort(ctx->footnote_hashtable.defs, ctx->footnote_hashtable.n_defs, + sizeof(MD_FOOTNOTE_DEF), md_footnote_def_cmp_index); + + MD_ENTER_BLOCK(MD_BLOCK_FOOTNOTE_DEF_SECTION, NULL); + + for(i = 0; i < ctx->footnote_hashtable.n_defs; i++) { + MD_FOOTNOTE_DEF* def = &ctx->footnote_hashtable.footnote_defs[i]; + if(def->index == 0) + break; + MD_CHECK(md_process_footnote_def(ctx, def)); + } + + MD_LEAVE_BLOCK(MD_BLOCK_FOOTNOTE_DEF_SECTION, NULL); + +abort: + return ret; +} + static int md_process_doc(MD_CTX *ctx) { @@ -6541,11 +7140,17 @@ md_process_doc(MD_CTX *ctx) md_end_current_block(ctx); MD_CHECK(md_build_ref_def_hashtable(ctx)); + if(ctx->parser.flags & MD_FLAG_FOOTNOTES) + MD_CHECK(md_build_footnote_def_hashtable(ctx)); /* Process all blocks. */ MD_CHECK(md_leave_child_containers(ctx, 0)); MD_CHECK(md_process_all_blocks(ctx)); + /* Emit footnote definitions that were referenced, in reference order. */ + if(ctx->parser.flags & MD_FLAG_FOOTNOTES) + MD_CHECK(md_process_footnote_defs(ctx)); + MD_LEAVE_BLOCK(MD_BLOCK_DOC, NULL); abort: @@ -6602,7 +7207,9 @@ md_parse(const MD_CHAR* text, MD_SIZE size, const MD_PARSER* parser, void* userd ctx.code_indent_offset = (ctx.parser.flags & MD_FLAG_NOINDENTEDCODEBLOCKS) ? (OFF)(-1) : 4; md_build_mark_char_map(&ctx); ctx.doc_ends_with_newline = (size > 0 && ISNEWLINE_(text[size-1])); + ctx.ref_def_hashtable.def_size = sizeof(MD_REF_DEF); ctx.max_ref_def_output = 16 * MIN(size, (MD_SIZE)(1024 * 1024 / 16)); + ctx.footnote_hashtable.def_size = sizeof(MD_FOOTNOTE_DEF); /* Reset all mark stacks and lists. */ for(i = 0; i < (int) SIZEOF_ARRAY(ctx.opener_stacks); i++) @@ -6618,7 +7225,7 @@ md_parse(const MD_CHAR* text, MD_SIZE size, const MD_PARSER* parser, void* userd /* Clean-up. */ md_free_ref_defs(&ctx); - md_free_ref_def_hashtable(&ctx); + md_free_footnote_defs(&ctx); free(ctx.buffer); free(ctx.marks); free(ctx.block_bytes); diff --git a/cpp/md4c/md4c.h b/cpp/md4c/md4c.h index 194a8d28..e920bc4d 100644 --- a/cpp/md4c/md4c.h +++ b/cpp/md4c/md4c.h @@ -1,6 +1,6 @@ /* * MD4C: Markdown parser for C - * (http://github.com/mity/md4c) + * (https://github.com/mity/md4c) * * Copyright (c) 2016-2026 Martin Mitáš * @@ -98,7 +98,26 @@ typedef enum MD_BLOCKTYPE { MD_BLOCK_TBODY, MD_BLOCK_TR, MD_BLOCK_TH, - MD_BLOCK_TD + MD_BLOCK_TD, + + /* Container for all referenced footnote definitions, rendered at the end + * of the document. + * Detail: NULL. + * Note: Used only if extension MD_FLAG_FOOTNOTES is enabled, and only when + * at least one footnote definition is referenced. */ + MD_BLOCK_FOOTNOTE_DEF_SECTION, + + /* A single footnote definition, rendered at the end of the document. + * Detail: Structure MD_BLOCK_FOOTNOTE_DEF_DETAIL. + * Note: Used only if extension MD_FLAG_FOOTNOTES is enabled. + * Only definitions that are actually referenced in the document are + * emitted, in order of first reference. */ + MD_BLOCK_FOOTNOTE_DEF, + + /* Adminition extension. + * Detail MD_BLOCK_ADMONITION_DETAIL. + * Note: Recognized only when MD_FLAG_ADMONITIONS is enabled. */ + MD_BLOCK_ADMONITION } MD_BLOCKTYPE; /* Span represents an in-line piece of a document which should be rendered with @@ -159,7 +178,19 @@ typedef enum MD_SPANTYPE { /* ... * Syntax: ~subscript~ * Note: Recognized only when MD_FLAG_SUBSCRIPTS is enabled. */ - MD_SPAN_SUBSCRIPT + MD_SPAN_SUBSCRIPT, + + /* Footnote reference, e.g. [^1] or [^note]. + * Syntax: [^label] + * Note: Recognized only when MD_FLAG_FOOTNOTES is enabled. + * The span is self-contained: no MD_TEXT callbacks fire between enter and + * leave. All needed information is in MD_SPAN_FOOTNOTE_REF_DETAIL. */ + MD_SPAN_FOOTNOTE_REF, + + /* ... + * Syntax: ==highlight== + * Note: Recognized only when MD_FLAG_HIGHLIGHT is enabled. */ + MD_SPAN_MARK } MD_SPANTYPE; /* Text is the actual textual contents of span. */ @@ -291,6 +322,11 @@ typedef struct MD_BLOCK_TD_DETAIL { MD_ALIGN align; } MD_BLOCK_TD_DETAIL; +/* Detailed info for MD_BLOCK_ADMONITION. */ +typedef struct MD_BLOCK_ADMONITION_DETAIL { + MD_ATTRIBUTE type; /* One of "note", "tip", "important", "warning", "caution" */ +} MD_BLOCK_ADMONITION_DETAIL; + /* Detailed info for MD_SPAN_A. */ typedef struct MD_SPAN_A_DETAIL { MD_ATTRIBUTE href; @@ -309,23 +345,37 @@ typedef struct MD_SPAN_WIKILINK { MD_ATTRIBUTE target; } MD_SPAN_WIKILINK_DETAIL; +/* Detailed info for MD_SPAN_FOOTNOTE_REF. */ +typedef struct MD_SPAN_FOOTNOTE_REF_DETAIL { + unsigned int id; /* 1-based identifier of the referenced footnote */ + unsigned int ref_id; /* 1-based identifier of this reference among references to the same footnote */ + MD_ATTRIBUTE label; /* Raw label text, e.g. "1" or "note" */ +} MD_SPAN_FOOTNOTE_REF_DETAIL; + +/* Detailed info for MD_BLOCK_FOOTNOTE_DEF. */ +typedef struct MD_BLOCK_FOOTNOTE_DEF_DETAIL { + unsigned int id; /* 1-based identifier of this footnote */ + unsigned int ref_count; /* Number of references to this footnote */ + MD_ATTRIBUTE label; /* Raw label text */ +} MD_BLOCK_FOOTNOTE_DEF_DETAIL; + /* Flags specifying extensions/deviations from CommonMark specification. * * By default (when MD_PARSER::flags == 0), we follow CommonMark specification. * The following flags may allow some extensions or deviations from it. */ -#define MD_FLAG_COLLAPSEWHITESPACE 0x0001 /* In MD_TEXT_NORMAL, collapse non-trivial whitespace into single ' ' */ -#define MD_FLAG_PERMISSIVEATXHEADERS 0x0002 /* Do not require space in ATX headers ( ###header ) */ -#define MD_FLAG_PERMISSIVEURLAUTOLINKS 0x0004 /* Recognize URLs as autolinks even without '<', '>' */ -#define MD_FLAG_PERMISSIVEEMAILAUTOLINKS 0x0008 /* Recognize e-mails as autolinks even without '<', '>' and 'mailto:' */ -#define MD_FLAG_NOINDENTEDCODEBLOCKS 0x0010 /* Disable indented code blocks. (Only fenced code works.) */ -#define MD_FLAG_NOHTMLBLOCKS 0x0020 /* Disable raw HTML blocks. */ -#define MD_FLAG_NOHTMLSPANS 0x0040 /* Disable raw HTML (inline). */ -#define MD_FLAG_TABLES 0x0100 /* Enable tables extension. */ -#define MD_FLAG_STRIKETHROUGH 0x0200 /* Enable strikethrough extension. */ +#define MD_FLAG_COLLAPSEWHITESPACE 0x1 /* In MD_TEXT_NORMAL, collapse non-trivial whitespace into single ' ' */ +#define MD_FLAG_PERMISSIVEATXHEADERS 0x2 /* Do not require space in ATX headers ( ###header ) */ +#define MD_FLAG_PERMISSIVEURLAUTOLINKS 0x4 /* Recognize URLs as autolinks even without '<', '>' */ +#define MD_FLAG_PERMISSIVEEMAILAUTOLINKS 0x8 /* Recognize e-mails as autolinks even without '<', '>' and 'mailto:' */ +#define MD_FLAG_NOINDENTEDCODEBLOCKS 0x10 /* Disable indented code blocks. (Only fenced code works.) */ +#define MD_FLAG_NOHTMLBLOCKS 0x20 /* Disable raw HTML blocks. */ +#define MD_FLAG_NOHTMLSPANS 0x40 /* Disable raw HTML (inline). */ +#define MD_FLAG_TABLES 0x100 /* Enable tables extension. */ +#define MD_FLAG_STRIKETHROUGH 0x200 /* Enable strikethrough extension. */ #define MD_FLAG_PERMISSIVEWWWAUTOLINKS \ - 0x0400 /* Enable WWW autolinks (even without any scheme prefix, if they begin with 'www.') */ -#define MD_FLAG_TASKLISTS 0x0800 /* Enable task list extension. */ + 0x400 /* Enable WWW autolinks (even without any scheme prefix, if they begin with 'www.') */ +#define MD_FLAG_TASKLISTS 0x800 /* Enable task list extension. */ #define MD_FLAG_LATEXMATHSPANS 0x1000 /* Enable $ and $$ containing LaTeX equations. */ #define MD_FLAG_WIKILINKS 0x2000 /* Enable wiki links extension. */ #define MD_FLAG_UNDERLINE 0x4000 /* Enable underline extension (and disables '_' for normal emphasis). */ @@ -333,6 +383,9 @@ typedef struct MD_SPAN_WIKILINK { #define MD_FLAG_SPOILERS 0x10000 /* Enable ||hidden text|| spoiler spans. */ #define MD_FLAG_SUPERSCRIPTS 0x20000 /* Enable ^superscript^ spans. */ #define MD_FLAG_SUBSCRIPTS 0x40000 /* Enable ~subscript~ spans. */ +#define MD_FLAG_ADMONITIONS 0x80000 /* Enable admonitions extension. */ +#define MD_FLAG_FOOTNOTES 0x100000 /* Enable [^label] footnote references. */ +#define MD_FLAG_HIGHLIGHT 0x200000 /* Enable ==highlight== spans. */ #define MD_FLAG_PERMISSIVEAUTOLINKS \ (MD_FLAG_PERMISSIVEEMAILAUTOLINKS | MD_FLAG_PERMISSIVEURLAUTOLINKS | MD_FLAG_PERMISSIVEWWWAUTOLINKS) @@ -348,7 +401,9 @@ typedef struct MD_SPAN_WIKILINK { * extensions, bringing the dialect closer to the original, are implemented. */ #define MD_DIALECT_COMMONMARK 0 -#define MD_DIALECT_GITHUB (MD_FLAG_PERMISSIVEAUTOLINKS | MD_FLAG_TABLES | MD_FLAG_STRIKETHROUGH | MD_FLAG_TASKLISTS) +#define MD_DIALECT_GITHUB \ + (MD_FLAG_PERMISSIVEAUTOLINKS | MD_FLAG_TABLES | MD_FLAG_STRIKETHROUGH | MD_FLAG_TASKLISTS | MD_FLAG_ADMONITIONS | \ + MD_FLAG_FOOTNOTES) /* Parser structure. */