summaryrefslogtreecommitdiffhomepage
path: root/grammar.js
diff options
context:
space:
mode:
Diffstat (limited to 'grammar.js')
-rw-r--r--grammar.js70
1 files changed, 68 insertions, 2 deletions
diff --git a/grammar.js b/grammar.js
index ffaadc2..aa94aad 100644
--- a/grammar.js
+++ b/grammar.js
@@ -33,10 +33,12 @@ module.exports = grammar({
_toplevel: $ => choice(
$._blank_line,
- // Header elements
+ // Header elements (yaml dialect, sisudoc-spine)
$.version_comment,
$.header_field,
$.header_comment,
+ // Header elements (bespoke dialect, original SiSU)
+ $.sisu_header_field,
// Structural headings
$.heading,
// Block elements
@@ -70,8 +72,15 @@ module.exports = grammar({
// =================================================================
// Version comment - first line indicator
+ //
+ // Banner forms observed in the wild span both dialects:
+ // # SiSU 8.0 # SiSUspine 8.0 # SiSU master 8.0
+ // % SiSU 4.0.0 % SiSU 0.72 % SiSU 7.1
+ // # SiSU: http://... % SiSU markup for 0.16 and later
+ // The prefix is % (sisu, original) or # (sisudoc-spine, yaml-era).
+ // Trailing content is free-form (version number, URL, prose).
// =================================================================
- version_comment: $ => token(prec(20, /# SiSU(spine)? [0-9]+(\.[0-9]+)*\n/)),
+ version_comment: $ => token(prec(20, /[#%] SiSU[^\n]*\n/)),
// =================================================================
// Header section (YAML-like metadata before :A~)
@@ -108,6 +117,63 @@ module.exports = grammar({
header_continuation: $ => token(prec(15, / [^\n]+\n/)),
// =================================================================
+ // Bespoke header section (original SiSU @key: / :subkey: form)
+ //
+ // Distinct from the yaml header above:
+ // @creator: <- sisu_header_key (starts with '@')
+ // :author: Carroll, Lewis <- sisu_header_continuation (1-space indent)
+ // vs the yaml form:
+ // creator: <- header_key
+ // author: "Carroll" <- header_continuation (2-space indent)
+ //
+ // The two are textually disjoint so they can coexist at the document
+ // top level without ambiguity. Mixing them inside a single document is
+ // not enforced by the grammar; the markup remains parseable, but
+ // linters (sisu / spine themselves, or an editor diagnostic) are the
+ // right place to flag dialect mixing.
+ // =================================================================
+ sisu_header_field: $ => prec(15, seq(
+ field('key', $.sisu_header_key),
+ optional(field('value', $.sisu_header_value)),
+ '\n',
+ repeat($.sisu_header_continuation),
+ )),
+
+ // Whitelist of top-level @keys observed across the sisu sample
+ // corpus. Restricted to avoid false matches on body @-references
+ // (which lack the trailing colon anyway, e.g. ':A~ @title @creator').
+ sisu_header_key: $ => token(prec(15, choice(
+ '@title:',
+ '@creator:',
+ '@date:',
+ '@rights:',
+ '@classify:',
+ '@identifier:',
+ '@original:',
+ '@notes:',
+ '@links:',
+ '@links:+', // additive variant seen in samples
+ '@make:',
+ '@publisher:',
+ '@language:',
+ '@vocabulary:',
+ ))),
+
+ sisu_header_value: $ => /[ \t]+[^\n]*/,
+
+ // Continuation line: any indented (1+ leading space) line whose first
+ // non-space character is not a newline. Covers:
+ // :author: Carroll, Lewis (1-space sub-key form)
+ // { Wikipedia }http://... (1-space freeform under @links:)
+ // topic_register: SiSU sample:book; (1-space sub-key form)
+ // continuation of long value (3-space wrap-line under sub-key)
+ // The 2-space case overlaps with the yaml header_continuation token,
+ // but disambiguation is contextual: the LR state inside a yaml
+ // header_field expects header_continuation, and the state inside a
+ // sisu_header_field expects sisu_header_continuation.
+ sisu_header_continuation: $ => token(prec(15, / +[^ \n][^\n]*\n/)),
+
+ // =================================================================
// Headings
// =================================================================
heading: $ => prec(18, choice(