summaryrefslogtreecommitdiffhomepage
path: root/grammar.js
diff options
context:
space:
mode:
authorRalph Amissah <ralph.amissah@gmail.com>2026-05-12 23:02:54 -0400
committerRalph Amissah <ralph.amissah@gmail.com>2026-05-12 23:02:54 -0400
commitd4f99d5be4a9329568911270bed0e64707c1ab7d (patch)
tree492d3e1fb675411a692467e327f3ab1730a2a9e0 /grammar.js
parentdocs: add README; adopt git.sisudoc.org/tools/ namespace (diff)
grammar: accept original SiSU bespoke @key: header dialectHEADmain
Until now the grammar handled only the sisudoc-spine YAML header form. Real SiSU markup uses two textually disjoint header dialects with an identical body grammar: bespoke (original SiSU, ruby): yaml (sisudoc-spine): % SiSU 4.0.0 # SiSU 8.0 @title: Alice's Adventures title: "Alice's Adventures" @creator: creator: :author: Carroll, Lewis author: "Carroll, Lewis" Add bespoke-dialect rules alongside the existing yaml ones; the body grammar is shared between the two. grammar.js: - version_comment widened from /# SiSU(spine)? <ver>/ to /[#%] SiSU[^\n]*\n/. Real banners observed across both corpora include "# SiSU 8.0", "# SiSUspine 8.0", "# SiSU master 8.0", "# SiSU: http://...", "% SiSU 4.0.0", "% SiSU 0.72", "% SiSU http://...", "% SiSU markup for 0.16 and later". - Added sisu_header_field / sisu_header_key / sisu_header_value / sisu_header_continuation. - sisu_header_key whitelists the 14 @keys observed in the sisu corpus (including the @links:+ additive variant), parallel to the existing yaml header_key whitelist. - sisu_header_continuation accepts any 1+ space indented line whose first non-space character is not a newline. Covers " :sub: val", " { text }url" freeform under @links:, and 3+ space wrap-line continuations (10690 occurrences across the sisu corpus, almost all inside @classify: :topic_register: entries). - Wired sisu_header_field into _toplevel alongside header_field. queries/highlights.scm: - Added @keyword / @string captures for the new sisu_* nodes, parallel to the existing yaml header captures. test/corpus/headers_sisu.txt: - 12 new cases: % SiSU banner variants, @title: with inline value, @creator: + :author:, @date: with multiple sub-keys, @make: mixed sub-keys, @links: with freeform { text }url continuations, @links:+ additive, full bespoke header block, and a coexistence case confirming yaml + bespoke at the same top level. README.md and sisu-markup_tree-sitter.md: - Describe dual-dialect support; add sisu corpus results table. Test results: - tree-sitter test: 79 / 79 pass. - sisu-markup-samples/data/samples/ (full sisu corpus): 44 / 65 parse cleanly (was 0 / 65). current/ layout parses at 20 / 21 (95 %); the dominant failure mode is the wrapped/ layout (7 / 21) which trips the pre-existing one-line-per-paragraph limitation, not the new header rules. - sisudoc-spine-samples/markup/ (full spine corpus): 37 / 46 unchanged. No regression in the yaml dialect. Mixing the two dialects inside one document remains parseable but non-idiomatic; enforcement is left to a future linter pass rather than the grammar. (assisted by Claude-Code)
Diffstat (limited to 'grammar.js')
-rw-r--r--grammar.js70
1 files changed, 68 insertions, 2 deletions
diff --git a/grammar.js b/grammar.js
index ffaadc2..aa94aad 100644
--- a/grammar.js
+++ b/grammar.js
@@ -33,10 +33,12 @@ module.exports = grammar({
_toplevel: $ => choice(
$._blank_line,
- // Header elements
+ // Header elements (yaml dialect, sisudoc-spine)
$.version_comment,
$.header_field,
$.header_comment,
+ // Header elements (bespoke dialect, original SiSU)
+ $.sisu_header_field,
// Structural headings
$.heading,
// Block elements
@@ -70,8 +72,15 @@ module.exports = grammar({
// =================================================================
// Version comment - first line indicator
+ //
+ // Banner forms observed in the wild span both dialects:
+ // # SiSU 8.0 # SiSUspine 8.0 # SiSU master 8.0
+ // % SiSU 4.0.0 % SiSU 0.72 % SiSU 7.1
+ // # SiSU: http://... % SiSU markup for 0.16 and later
+ // The prefix is % (sisu, original) or # (sisudoc-spine, yaml-era).
+ // Trailing content is free-form (version number, URL, prose).
// =================================================================
- version_comment: $ => token(prec(20, /# SiSU(spine)? [0-9]+(\.[0-9]+)*\n/)),
+ version_comment: $ => token(prec(20, /[#%] SiSU[^\n]*\n/)),
// =================================================================
// Header section (YAML-like metadata before :A~)
@@ -108,6 +117,63 @@ module.exports = grammar({
header_continuation: $ => token(prec(15, / [^\n]+\n/)),
// =================================================================
+ // Bespoke header section (original SiSU @key: / :subkey: form)
+ //
+ // Distinct from the yaml header above:
+ // @creator: <- sisu_header_key (starts with '@')
+ // :author: Carroll, Lewis <- sisu_header_continuation (1-space indent)
+ // vs the yaml form:
+ // creator: <- header_key
+ // author: "Carroll" <- header_continuation (2-space indent)
+ //
+ // The two are textually disjoint so they can coexist at the document
+ // top level without ambiguity. Mixing them inside a single document is
+ // not enforced by the grammar; the markup remains parseable, but
+ // linters (sisu / spine themselves, or an editor diagnostic) are the
+ // right place to flag dialect mixing.
+ // =================================================================
+ sisu_header_field: $ => prec(15, seq(
+ field('key', $.sisu_header_key),
+ optional(field('value', $.sisu_header_value)),
+ '\n',
+ repeat($.sisu_header_continuation),
+ )),
+
+ // Whitelist of top-level @keys observed across the sisu sample
+ // corpus. Restricted to avoid false matches on body @-references
+ // (which lack the trailing colon anyway, e.g. ':A~ @title @creator').
+ sisu_header_key: $ => token(prec(15, choice(
+ '@title:',
+ '@creator:',
+ '@date:',
+ '@rights:',
+ '@classify:',
+ '@identifier:',
+ '@original:',
+ '@notes:',
+ '@links:',
+ '@links:+', // additive variant seen in samples
+ '@make:',
+ '@publisher:',
+ '@language:',
+ '@vocabulary:',
+ ))),
+
+ sisu_header_value: $ => /[ \t]+[^\n]*/,
+
+ // Continuation line: any indented (1+ leading space) line whose first
+ // non-space character is not a newline. Covers:
+ // :author: Carroll, Lewis (1-space sub-key form)
+ // { Wikipedia }http://... (1-space freeform under @links:)
+ // topic_register: SiSU sample:book; (1-space sub-key form)
+ // continuation of long value (3-space wrap-line under sub-key)
+ // The 2-space case overlaps with the yaml header_continuation token,
+ // but disambiguation is contextual: the LR state inside a yaml
+ // header_field expects header_continuation, and the state inside a
+ // sisu_header_field expects sisu_header_continuation.
+ sisu_header_continuation: $ => token(prec(15, / +[^ \n][^\n]*\n/)),
+
+ // =================================================================
// Headings
// =================================================================
heading: $ => prec(18, choice(