diff options
Diffstat (limited to 'grammar.js')
| -rw-r--r-- | grammar.js | 70 |
1 files changed, 68 insertions, 2 deletions
@@ -33,10 +33,12 @@ module.exports = grammar({ _toplevel: $ => choice( $._blank_line, - // Header elements + // Header elements (yaml dialect, sisudoc-spine) $.version_comment, $.header_field, $.header_comment, + // Header elements (bespoke dialect, original SiSU) + $.sisu_header_field, // Structural headings $.heading, // Block elements @@ -70,8 +72,15 @@ module.exports = grammar({ // ================================================================= // Version comment - first line indicator + // + // Banner forms observed in the wild span both dialects: + // # SiSU 8.0 # SiSUspine 8.0 # SiSU master 8.0 + // % SiSU 4.0.0 % SiSU 0.72 % SiSU 7.1 + // # SiSU: http://... % SiSU markup for 0.16 and later + // The prefix is % (sisu, original) or # (sisudoc-spine, yaml-era). + // Trailing content is free-form (version number, URL, prose). // ================================================================= - version_comment: $ => token(prec(20, /# SiSU(spine)? [0-9]+(\.[0-9]+)*\n/)), + version_comment: $ => token(prec(20, /[#%] SiSU[^\n]*\n/)), // ================================================================= // Header section (YAML-like metadata before :A~) @@ -108,6 +117,63 @@ module.exports = grammar({ header_continuation: $ => token(prec(15, / [^\n]+\n/)), // ================================================================= + // Bespoke header section (original SiSU @key: / :subkey: form) + // + // Distinct from the yaml header above: + // @creator: <- sisu_header_key (starts with '@') + // :author: Carroll, Lewis <- sisu_header_continuation (1-space indent) + // vs the yaml form: + // creator: <- header_key + // author: "Carroll" <- header_continuation (2-space indent) + // + // The two are textually disjoint so they can coexist at the document + // top level without ambiguity. Mixing them inside a single document is + // not enforced by the grammar; the markup remains parseable, but + // linters (sisu / spine themselves, or an editor diagnostic) are the + // right place to flag dialect mixing. + // ================================================================= + sisu_header_field: $ => prec(15, seq( + field('key', $.sisu_header_key), + optional(field('value', $.sisu_header_value)), + '\n', + repeat($.sisu_header_continuation), + )), + + // Whitelist of top-level @keys observed across the sisu sample + // corpus. Restricted to avoid false matches on body @-references + // (which lack the trailing colon anyway, e.g. ':A~ @title @creator'). + sisu_header_key: $ => token(prec(15, choice( + '@title:', + '@creator:', + '@date:', + '@rights:', + '@classify:', + '@identifier:', + '@original:', + '@notes:', + '@links:', + '@links:+', // additive variant seen in samples + '@make:', + '@publisher:', + '@language:', + '@vocabulary:', + ))), + + sisu_header_value: $ => /[ \t]+[^\n]*/, + + // Continuation line: any indented (1+ leading space) line whose first + // non-space character is not a newline. Covers: + // :author: Carroll, Lewis (1-space sub-key form) + // { Wikipedia }http://... (1-space freeform under @links:) + // topic_register: SiSU sample:book; (1-space sub-key form) + // continuation of long value (3-space wrap-line under sub-key) + // The 2-space case overlaps with the yaml header_continuation token, + // but disambiguation is contextual: the LR state inside a yaml + // header_field expects header_continuation, and the state inside a + // sisu_header_field expects sisu_header_continuation. + sisu_header_continuation: $ => token(prec(15, / +[^ \n][^\n]*\n/)), + + // ================================================================= // Headings // ================================================================= heading: $ => prec(18, choice( |
