0.5.0 per object inline munging (rather than by line)

- munging per object for inline markup fontface, links, urls, images, footnotes/endnotes (also book index)
author: Ralph Amissah <ralph.amissah@gmail.com> 2019-02-24 13:58:14 -0500
committer: Ralph Amissah <ralph.amissah@gmail.com> 2019-05-17 16:59:38 -0400
commit: f2d16036e4dd6ae22123b52712e051d8e8ba553c (patch)
tree: 70ee530750816f7f28cf6aecf40f81928a9e6df5 /src/doc_reform
parent: metaverse single line munging tuning (diff)
2 files changed, 102 insertions, 80 deletions
diff --git a/src/doc_reform/meta/metadoc_from_src.d b/src/doc_reform/meta/metadoc_from_src.d
index 470d579..38370db 100644
--- a/src/doc_reform/meta/metadoc_from_src.d
+++ b/src/doc_reform/meta/metadoc_from_src.d
@@ -243,6 +243,46 @@ template DocReformDocAbstraction() {
     }
     return line;
   }
+  static auto links_and_images(L)(L obj_txt) {
+    static auto rgx = Rgx();
+    static auto mkup = InlineMarkup();
+    if (obj_txt.match(rgx.smid_inline_url_generic)) {
+      if (
+        obj_txt.match(rgx.smid_inline_link_endnote_url_helper)
+        || obj_txt.match(rgx.smid_inline_link_endnote_url_helper_punctuated)
+      ) {
+        obj_txt = replaceAll!(m => format("%s%s%s%s%s%s%s %s%s%s%s%s%s %s%s",
+          mkup.lnk_o, m[1].strip, mkup.lnk_c,
+          mkup.url_o, m[2], mkup.url_c,
+          mkup.en_a_o,
+          mkup.lnk_o, m[2].strip, mkup.lnk_c,
+          mkup.url_o, m[2], mkup.url_c,
+          mkup.en_a_c,
+          m[3]
+        ))(obj_txt, rgx.smid_inline_link_endnote_url_helper_punctuated);
+        obj_txt = replaceAll!(m => format("%s%s%s%s%s%s%s %s%s%s%s%s%s %s",
+          mkup.lnk_o, m[1].strip, mkup.lnk_c,
+          mkup.url_o, m[2], mkup.url_c,
+          mkup.en_a_o,
+          mkup.lnk_o, m[2].strip, mkup.lnk_c,
+          mkup.url_o, m[2], mkup.url_c,
+          mkup.en_a_c
+        ))(obj_txt, rgx.smid_inline_link_endnote_url_helper);
+    } else {
+        obj_txt = replaceAll!(m => format("%s%s%s%s%s%s%s",
+          m[1],
+          mkup.lnk_o, m[2].strip, mkup.lnk_c,
+          mkup.url_o, m[3], mkup.url_c
+        ))(obj_txt, rgx.smid_inline_link_markup_regular);
+      }
+        obj_txt = replaceAll!(m => format("%s%s%s%s%s%s%s",
+          m[1],
+          mkup.lnk_o, m[2].strip, mkup.lnk_c,
+          mkup.url_o, m[2], mkup.url_c
+        ))(obj_txt, rgx.smid_inline_link_naked_url); //
+    }
+    return obj_txt;
+  }
   /+ book index variables +/
   string book_idx_tmp;
   string[][string][string] bookindex_unordered_hashes;
@@ -459,7 +499,13 @@ template DocReformDocAbstraction() {
            (includes regular text paragraph, headings & blocks other than code) +/
         /+ heading, glossary, blurb, poem, group, block, quote, table +/
         line = line.inline_markup_faces; // by text line (rather than by text object), linebreaks in para problematic
-        if (line.matchFirst(rgx.heading_biblio)
+        if ((line.matchFirst(rgx.heading_biblio)
+          && obj_type_status["para"]  != State.on
+          && obj_type_status["group"] != State.on
+          && obj_type_status["block"] != State.on
+          && obj_type_status["poem"]  != State.on
+          && obj_type_status["table"] != State.on
+          && obj_type_status["quote"] != State.on)
         || (obj_type_status["biblio_section"] == State.on
         && (!(line.matchFirst(rgx.heading_blurb_glossary)))
         && (!(line.matchFirst(rgx.heading)))
@@ -476,7 +522,13 @@ template DocReformDocAbstraction() {
             }
           }
           continue;
-        } else if (line.matchFirst(rgx.heading_glossary)
+        } else if ((line.matchFirst(rgx.heading_glossary)
+          && obj_type_status["para"]  != State.on
+          && obj_type_status["group"] != State.on
+          && obj_type_status["block"] != State.on
+          && obj_type_status["poem"]  != State.on
+          && obj_type_status["table"] != State.on
+          && obj_type_status["quote"] != State.on)
         || (obj_type_status["glossary_section"] == State.on
         && (!(line.matchFirst(rgx.heading_biblio_blurb)))
         && (!(line.matchFirst(rgx.heading)))
@@ -564,7 +616,13 @@ template DocReformDocAbstraction() {
             obj_type_status["ocn_status"] = OCNstatus.on;
           }
           continue;
-        } else if (line.matchFirst(rgx.heading_blurb)
+        } else if ((line.matchFirst(rgx.heading_blurb)
+          && obj_type_status["para"]  != State.on
+          && obj_type_status["group"] != State.on
+          && obj_type_status["block"] != State.on
+          && obj_type_status["poem"]  != State.on
+          && obj_type_status["table"] != State.on
+          && obj_type_status["quote"] != State.on)
         || (obj_type_status["blurb_section"] == State.on
         && (!(line.matchFirst(rgx.heading_biblio_glossary)))
         && (!(line.matchFirst(rgx.heading)))
@@ -690,7 +748,7 @@ template DocReformDocAbstraction() {
               comp_obj_para.metainfo.is_of_section        = "blurb";
               comp_obj_para.metainfo.is_of_type           = "para";
               comp_obj_para.metainfo.is_a                 = "blurb";
-              comp_obj_para.text                          = munge.url_links(line.to!string.strip).replaceFirst(rgx.para_attribs, "");
+              comp_obj_para.text                          = links_and_images(line.to!string.strip).replaceFirst(rgx.para_attribs, "");
               comp_obj_para.metainfo.ocn                  = 0;
               comp_obj_para.metainfo.identifier           = "";
               comp_obj_para.metainfo.object_number_off    = true;
@@ -1086,7 +1144,6 @@ template DocReformDocAbstraction() {
             obj_type_status["glossary_section"] = State.off;
             obj_type_status["blurb_section"]    = State.off;
           }
-          the_document_body_section[$-1].text = (the_document_body_section[$-1].text).inline_markup_faces;
           if (the_document_body_section[$-1].metainfo.is_a == "verse") {
             /+ scan for endnotes for whole poem (each verse in poem) +/
             foreach (i; previous_length .. the_document_body_section.length) {
@@ -1321,7 +1378,7 @@ template DocReformDocAbstraction() {
         "Endnotes",
         "endnotes",
       );
-      toc_txt_= munge.url_links(toc_txt_);
+      toc_txt_= toc_txt_.links_and_images;
       comp_obj_toc.text                       = toc_txt_.to!string.strip;
       comp_obj_toc.has.inline_links           = true;
       the_table_of_contents_section           ~= comp_obj_toc;
@@ -1332,7 +1389,7 @@ template DocReformDocAbstraction() {
         "Glossary",
         "glossary",
       );
-      toc_txt_= munge.url_links(toc_txt_);
+      toc_txt_= toc_txt_.links_and_images;
       comp_obj_toc.text                       = toc_txt_.to!string.strip;
       comp_obj_toc.has.inline_links           = true;
       the_table_of_contents_section           ~= comp_obj_toc;
@@ -1343,7 +1400,7 @@ template DocReformDocAbstraction() {
         "Bibliography",
         "bibliography",
       );
-      toc_txt_= munge.url_links(toc_txt_);
+      toc_txt_= toc_txt_.links_and_images;
       comp_obj_toc.text                       = toc_txt_.to!string.strip;
       comp_obj_toc.has.inline_links           = true;
       the_table_of_contents_section           ~= comp_obj_toc;
@@ -1354,7 +1411,7 @@ template DocReformDocAbstraction() {
         "Book Index",
         "bookindex",
       );
-      toc_txt_= munge.url_links(toc_txt_);
+      toc_txt_= toc_txt_.links_and_images;
       comp_obj_toc.text                       = toc_txt_.to!string.strip;
       comp_obj_toc.has.inline_links           = true;
       the_table_of_contents_section           ~= comp_obj_toc;
@@ -1365,7 +1422,7 @@ template DocReformDocAbstraction() {
         "Blurb",
         "blurb",
       );
-      toc_txt_= munge.url_links(toc_txt_);
+      toc_txt_= toc_txt_.links_and_images;
       comp_obj_toc.has.inline_links           = true;
       comp_obj_toc.text                       = toc_txt_.to!string.strip;
       the_table_of_contents_section           ~= comp_obj_toc;
@@ -1730,7 +1787,13 @@ template DocReformDocAbstraction() {
           }
           obj.text = obj.text.replaceFirst(
             rgx.inline_image_without_dimensions,
-            ("$1☼$3,w" ~ _w.to!string ~ "h" ~ _h.to!string ~ " $6")
+            format(q"┋%s☼%s,w%sh%s %s┋",
+              "$1",
+              "$3",
+              _w.to!string,
+              _h.to!string,
+              "$6",
+            )
           );
         }
         debug(images) {
@@ -3109,7 +3172,13 @@ template DocReformDocAbstraction() {
     mixin DocReformBiblio;
     auto jsn = BibJsnStr();
     static auto rgx = Rgx();
-    if (line.matchFirst(rgx.heading_biblio)) {
+    if (line.matchFirst(rgx.heading_biblio)
+        && obj_type_status["para"]  != State.on
+        && obj_type_status["group"] != State.on
+        && obj_type_status["block"] != State.on
+        && obj_type_status["poem"]  != State.on
+        && obj_type_status["table"] != State.on
+        && obj_type_status["quote"] != State.on) {
       obj_type_status["biblio_section"]   = TriState.on;
       obj_type_status["blurb_section"]    = State.off;
       obj_type_status["glossary_section"] = State.off;
@@ -4264,62 +4333,6 @@ template DocReformDocAbstraction() {
       n_foot_sp_asterisk = 0;
       n_foot_sp_plus = 0;
     }
-    static auto url_links(Ot)(Ot obj_txt_in) {
-      debug(asserts) {
-        static assert(is(typeof(obj_txt_in) == string));
-      }
-      /+ url matched +/
-      obj_txt_in = obj_txt_in.replaceAll(rgx.inline_notes_al_special, ""); // TODO reinstate when special footnotes are implemented
-      if (obj_txt_in.match(rgx.smid_inline_url_generic)) {
-        /+ link: naked url: http://url +/
-        if (obj_txt_in.match(rgx.smid_inline_link_naked_url)) {
-          obj_txt_in = obj_txt_in.replaceAll(
-              rgx.smid_inline_link_naked_url,
-              ("$1"
-                ~ mkup.lnk_o ~ "$2" ~ mkup.lnk_c
-                ~  mkup.url_o ~ "$2" ~  mkup.url_c
-              ) // ("$1{ $2 }$2$3")
-            );
-        }
-        /+ link with helper for endnote including the url:
-             {~^ link which includes url as footnote }http://url
-           maps to:
-             { link which includes url as footnote }http://url~{ { http://url }http://url }~
-        +/
-        if (obj_txt_in.match(rgx.smid_inline_link_endnote_url_helper)) {
-          obj_txt_in = obj_txt_in
-            .replaceAll(
-              rgx.smid_inline_link_endnote_url_helper_punctuated,
-              (mkup.lnk_o ~ "$1" ~ mkup.lnk_c
-                ~ mkup.url_o ~ "$2" ~ mkup.url_c
-                ~ "~{ " ~ mkup.lnk_o ~ " $2 " ~ mkup.lnk_c
-                ~ mkup.url_o ~ "$2" ~ mkup.url_c
-                ~  " }~$3") // ("{ $1 }$2~{ { $2 }$2 }~$3")
-            )
-            .replaceAll(
-              rgx.smid_inline_link_endnote_url_helper,
-              (mkup.lnk_o ~ "$1" ~ mkup.lnk_c
-                ~ mkup.url_o ~ "$2" ~ mkup.url_c
-                ~ "~{ " ~ mkup.lnk_o ~ " $2 " ~ mkup.lnk_c
-                ~ mkup.url_o ~ "$2" ~ mkup.url_c
-                ~  " }~") // ("{ $1 }$2~{ { $2 }$2 }~")
-            );
-        }
-        /+ link with regular markup:
-           { linked text or image }http://url
-        +/
-        if (obj_txt_in.match(rgx.smid_inline_link_markup_regular)) {
-          obj_txt_in = obj_txt_in.replaceAll(
-            rgx.smid_inline_link_markup_regular,
-            ("$1"
-              ~ mkup.lnk_o ~ "$2" ~ mkup.lnk_c
-              ~  mkup.url_o ~ "$3" ~  mkup.url_c
-            ) // ("$1{ $2 }$3$4")
-          );
-        }
-      }
-      return obj_txt_in;
-    }
     static auto images(Ot)(Ot obj_txt_in) {
       debug(asserts) {
         static assert(is(typeof(obj_txt_in) == string));
@@ -4342,7 +4355,7 @@ template DocReformDocAbstraction() {
           }
         } else if (obj_txt_in.match(rgx.smid_image)) {
           obj_txt_in = obj_txt_in
-            .replaceAll(rgx.smid_image, ("$1" ~ mkup.img ~ "$2,w0h0 " ~ "$3"))
+            .replaceAll(rgx.smid_image, ("$1" ~ mkup.img ~ "$2,w0h0" ~ "$3"))
             .replaceAll(rgx.smid_image_delimit, ("$1"
               ~ mkup.lnk_o ~ "$2".strip ~ mkup.lnk_c
               ~ mkup.url_o ~ mkup.url_c));
@@ -4451,7 +4464,7 @@ template DocReformDocAbstraction() {
       /+ url matched +/
       if (obj_txt_in.match(rgx.smid_inline_url)) {
         urls = true;
-        obj_txt_in = url_links(obj_txt_in);
+        obj_txt_in = obj_txt_in.links_and_images;
       }
       if (auto m = obj_txt_in.match(rgx.para_inline_link_anchor)) {
         obj_txt_in = obj_txt_in
@@ -4645,6 +4658,15 @@ template DocReformDocAbstraction() {
       obj_notes_and_links["notes_star"]          = false;
       obj_notes_and_links["links"]               = false;
       obj_notes_and_links["image_no_dimensions"] = false;
+      if ((obj_["is"] == "para")
+        || (obj_["is"] == "heading")
+        || (obj_["is"] == "quote")
+        || (obj_["is"] == "group")
+        || (obj_["is"] == "block")
+        || (obj_["is"] == "verse")) {
+        obj_txt["munge"] = (obj_txt["munge"]).inline_markup_faces;
+        obj_txt["munge"] = (obj_txt["munge"]).links_and_images;
+      }
       switch (obj_["is"]) {
       case "heading":
         if (_new_doc) {
@@ -4758,7 +4780,7 @@ template DocReformDocAbstraction() {
           heading_toc_,
           _anchor_tag,
         );
-        toc_txt_= munge.url_links(toc_txt_);
+        toc_txt_= toc_txt_.links_and_images;
         comp_obj_toc                             = comp_obj_toc.init;
         comp_obj_toc.metainfo.is_of_part         = "frontmatter";
         comp_obj_toc.metainfo.is_of_section      = "toc";
@@ -4819,7 +4841,7 @@ template DocReformDocAbstraction() {
           _anchor_tag,
         );
         lev4_subtoc[tag_in_seg["seg_lv4"]]
-        ~= munge.url_links(obj_["lev_markup_number"]
+        ~= links_and_images(obj_["lev_markup_number"]
              ~ "~ " ~ subtoc_txt_.to!string.strip
            );
         break;
@@ -5515,7 +5537,7 @@ template DocReformDocAbstraction() {
             string markup = "";
             if (auto m = locs.matchFirst(rgx.book_index_go)) {
               markup
-                = munge.url_links("{ " ~ m["link"] ~ " }"
+                = links_and_images("{ " ~ m["link"] ~ " }"
                 ~ "#" ~ m["ocn"] ~ ", ");
             } else {
               writeln(__LINE__, ": ", locs);
@@ -5623,12 +5645,12 @@ template DocReformDocAbstraction() {
         // you need anchor for segments at this point ->
         object_notes["anchor"] ~= "note_" ~ m.captures[1] ~ "』";
         object_notes["notes"]  ~= (tag_in_seg["seg_lv4"].empty)
-        ? (munge.url_links(
+        ? (links_and_images(
             "{" ~ mkup.superscript  ~ mkup.ff_o ~ m.captures[1] ~ "." ~ mkup.ff_c  ~ mkup.superscript  ~ "}#noteref_"
             ~ m.captures[1]) ~ " "
             ~ m.captures[2] ~ "』"
           )
-        : (munge.url_links(
+        : (links_and_images(
             "{" ~ mkup.superscript ~ mkup.ff_o ~ m.captures[1] ~ "." ~ mkup.ff_c  ~ mkup.superscript ~ "}"
              ~ mkup.mark_internal_site_lnk
              ~ tag_in_seg["seg_lv4"]
diff --git a/src/doc_reform/meta/rgx.d b/src/doc_reform/meta/rgx.d
index f875ce3..2bdb3ec 100644
--- a/src/doc_reform/meta/rgx.d
+++ b/src/doc_reform/meta/rgx.d
@@ -155,14 +155,14 @@ static template DocReformRgxInit() {
     static smid_inline_link_endnote_url_helper_punctuated = ctRegex!(`\{~\^\s+(?P<content>.+?)\}(?P<link>(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?=[.,;:?!]?([ ]|$))`, "mg");
     static smid_inline_link_endnote_url_helper            = ctRegex!(`\{~\^\s+(?P<content>.+?)\}(?P<link>(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+)`, "mg");
     static image                                           = ctRegex!(`([a-zA-Z0-9._-]+?\.(?:png|gif|jpg))`, "mg");
-    static smid_image                                      = ctRegex!(`(?P<pre>(?:^|[ ]|[^\S]?)\{(?:~\^\s+|\s*))(?P<image>\S+\.(?:png|gif|jpg))\s*(?P<post>(?:.+?)\s*\}(?:image|(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?=[;:!,?.]?([ )\]]|$)))`, "mg");
-    static smid_image_generic                              = ctRegex!(`(?:^|[ ]|[^\S]?)\{(?:~\^\s+|\s*)\S+\.(?:png|gif|jpg).+?\}(?:image|(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?=[;:!,?.]?([ )\]]|$))`, "mg");
-    static smid_image_with_dimensions                      = ctRegex!(`(?P<pre>(?:^|[ ]|[^\S]?)\{(?:~\^\s+|\s*))(?P<image>\S+\.(?:png|gif|jpg))\s+(?P<width>\d+)x(?P<height>\d+)\s*(?P<post>(?:.+?)\s*\}(?:image|(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?=[;:!,?.]?([ )\]]|$)))`, "mg");
+    static smid_image                                      = ctRegex!(`(?P<pre>(?:^|[ ]|[^\S]?)[{┥](?:~\^\s+|\s*))(?P<image>\S+\.(?:png|gif|jpg))(?P<post>(?:.*?)\s*[}┝](?:image|┤.*?├|(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?=[;:!,?.]?([ )\]]|$)))`, "mg");
+    static smid_image_generic                              = ctRegex!(`(?:^|[ ]|[^\S]?)[{┥](?:~\^\s+|\s*)\S+\.(?:png|gif|jpg).*?[}┝](?:image|┤.*?├|(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?=[;:!,?.]?([ )\]]|$))`, "mg");
+    static smid_image_with_dimensions                      = ctRegex!(`(?P<pre>(?:^|[ ]|[^\S]?)[{┥](?:~\^\s+|\s*))(?P<image>\S+\.(?:png|gif|jpg))\s+(?P<width>\d+)x(?P<height>\d+)\s*(?P<post>(?:.*?)\s*[}┝](?:image|┤.*?├|(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?=[;:!,?.]?([ )\]]|$)))`, "mg");
+    static smid_mod_image_without_dimensions               = ctRegex!(`[{┥](?:~\^\s+|\s*)☼\S+\.(?:png|gif|jpg),w0h0.*[}┝](?:image|┤.*?├|(?:https?|git):\/\/\S+?)(?=[;:!,?.]?([ )\]]|$))`, "mg");
     static smid_image_delimit                              = ctRegex!(`(?P<pre>^|[ ]|[^\S]?)\{\s*(?P<text>.+?)\s*\}(?:image)(?=[;:!,?.]?([ )\]]|$))`, "mg");
-    static smid_mod_image_without_dimensions               = ctRegex!(`[{┥](?:~\^\s+|\s*)☼\S+\.(?:png|gif|jpg),w0h0\s+(?:.+?)\s*[}┝](?:image|(?:https?|git):\/\/\S+?)(?=[;:!,?.]?([ )\]]|$))`, "mg");
     /+ inline markup book index +/
     static book_index                                     = ctRegex!(`^=\{\s*(.+?)\}$`, "m");
-    static book_index_open                                = ctRegex!(`^=\{\s*([^}]+?)$`);
+    static book_index_open                                = ctRegex!(`^=\{\s*([^}]*?)$`);
     static book_index_close                               = ctRegex!(`^(.*?)\}$`, "m");
     /+ no object_number object +/
     static object_number_off                            = ctRegex!(`~#[ ]*$`, "m");
author	Ralph Amissah <ralph.amissah@gmail.com>	2019-02-24 13:58:14 -0500
committer	Ralph Amissah <ralph.amissah@gmail.com>	2019-05-17 16:59:38 -0400
commit	f2d16036e4dd6ae22123b52712e051d8e8ba553c (patch)
tree	70ee530750816f7f28cf6aecf40f81928a9e6df5 /src/doc_reform
parent	metaverse single line munging tuning (diff)