about summary refs log tree commit diff
diff options
context:
space:
mode:
authorbeatrix <beatrix.bitrot@gmail.com>2017-10-19 19:29:52 -0400
committerGitHub <noreply@github.com>2017-10-19 19:29:52 -0400
commitb15dd0551431ab66a9c5027e24bafb202cf6e645 (patch)
treefbc6f00a2260cd0528a95f37a6817c08789f580e
parentf5e2469485a632ac55c02214e8b4ff476108d8f4 (diff)
parent21bafc65555d9ce82bf9e3c7841ba69564cc70fd (diff)
Merge pull request #191 from glitch-soc/garglamel-yaml
ƔAML update
-rw-r--r--app/javascript/glitch/util/bio_metadata.js225
1 files changed, 68 insertions, 157 deletions
diff --git a/app/javascript/glitch/util/bio_metadata.js b/app/javascript/glitch/util/bio_metadata.js
index eb3ad01fe..599ec20e2 100644
--- a/app/javascript/glitch/util/bio_metadata.js
+++ b/app/javascript/glitch/util/bio_metadata.js
@@ -69,6 +69,10 @@ functions are:
   easier to read and to maintain. I leave it to the future readers of
   this code to determine the extent of my successes in this endeavor.
 
+  UPDATE 19 Oct 2017: We no longer allow character escapes inside our
+  double-quoted strings for ease of processing. We now internally use
+  the name "ƔAML" in our code to clarify that this is Not Quite YAML.
+
                                        Sending love + warmth eternal,
                                        - kibigo [@kibi@glitch.social]
 
@@ -96,10 +100,7 @@ const ALLOWED_CHAR      =  unirex( //  `c-printable` in the YAML 1.2 spec.
     compat_mode ? '[\t\n\r\x20-\x7e\x85\xa0-\ufffd]' : '[\t\n\r\x20-\x7e\x85\xa0-\ud7ff\ue000-\ufffd\u{10000}-\u{10FFFF}]'
   );
 const WHITE_SPACE       = /[ \t]/;
-const INDENTATION       = / */;  //  Indentation must be only spaces.
 const LINE_BREAK        = /\r?\n|\r|<br\s*\/?>/;
-const ESCAPE_CHAR       = /[0abt\tnvfre "\/\\N_LP]/;
-const HEXADECIMAL_CHARS = /[0-9a-fA-F]/;
 const INDICATOR         = /[-?:,[\]{}&#*!|>'"%@`]/;
 const FLOW_CHAR         = /[,[\]{}]/;
 
@@ -121,7 +122,7 @@ const NEW_LINE          = unirex(
   rexstr(ANY_WHITE_SPACE) + rexstr(LINE_BREAK)
 );
 const SOME_NEW_LINES    = unirex(
-  '(?:' + rexstr(ANY_WHITE_SPACE) + rexstr(LINE_BREAK) + ')+'
+  '(?:' + rexstr(NEW_LINE) + ')+'
 );
 const POSSIBLE_STARTS   = unirex(
   rexstr(DOCUMENT_START) + rexstr(/<p[^<>]*>/) + '?'
@@ -131,22 +132,13 @@ const POSSIBLE_ENDS     = unirex(
   rexstr(DOCUMENT_END) + '|' +
   rexstr(/<\/p>/)
 );
-const CHARACTER_ESCAPE  = unirex(
-  rexstr(/\\/) +
-  '(?:' +
-    rexstr(ESCAPE_CHAR) + '|' +
-    rexstr(/x/) + rexstr(HEXADECIMAL_CHARS) + '{2}' + '|' +
-    rexstr(/u/) + rexstr(HEXADECIMAL_CHARS) + '{4}' + '|' +
-    rexstr(/U/) + rexstr(HEXADECIMAL_CHARS) + '{8}' +
-  ')'
+const QUOTE_CHAR         = unirex(
+  '(?=' + rexstr(NOT_LINE_BREAK) + ')[^"]'
 );
-const ESCAPED_CHAR      = unirex(
-  rexstr(/(?!["\\])/) + rexstr(NOT_LINE_BREAK) + '|' +
-  rexstr(CHARACTER_ESCAPE)
-);
-const ANY_ESCAPED_CHARS = unirex(
-  rexstr(ESCAPED_CHAR) + '*'
+const ANY_QUOTE_CHAR    = unirex(
+  rexstr(QUOTE_CHAR) + '*'
 );
+
 const ESCAPED_APOS      = unirex(
   '(?=' + rexstr(NOT_LINE_BREAK) + ')' + rexstr(/[^']|''/)
 );
@@ -190,120 +182,76 @@ const LATER_VALUE_CHAR  = unirex(
 
 /*  YAML CONSTRUCTS  */
 
-const YAML_START        = unirex(
-  rexstr(ANY_WHITE_SPACE) + rexstr(/---/)
+const ƔAML_START        = unirex(
+  rexstr(ANY_WHITE_SPACE) + '---'
 );
-const YAML_END          = unirex(
-  rexstr(ANY_WHITE_SPACE) + rexstr(/(?:---|\.\.\.)/)
+const ƔAML_END          = unirex(
+  rexstr(ANY_WHITE_SPACE) + '(?:---|\.\.\.)'
 );
-const YAML_LOOKAHEAD    = unirex(
+const ƔAML_LOOKAHEAD    = unirex(
   '(?=' +
-    rexstr(YAML_START) +
+    rexstr(ƔAML_START) +
     rexstr(ANY_ALLOWED_CHARS) + rexstr(NEW_LINE) +
-    rexstr(YAML_END) + rexstr(POSSIBLE_ENDS) +
+    rexstr(ƔAML_END) + rexstr(POSSIBLE_ENDS) +
   ')'
 );
-const YAML_DOUBLE_QUOTE = unirex(
-  rexstr(/"/) + rexstr(ANY_ESCAPED_CHARS) + rexstr(/"/)
+const ƔAML_DOUBLE_QUOTE = unirex(
+  '"' + rexstr(ANY_QUOTE_CHAR) + '"'
 );
-const YAML_SINGLE_QUOTE = unirex(
-  rexstr(/'/) + rexstr(ANY_ESCAPED_APOS) + rexstr(/'/)
+const ƔAML_SINGLE_QUOTE = unirex(
+  '\'' + rexstr(ANY_ESCAPED_APOS) + '\''
 );
-const YAML_SIMPLE_KEY   = unirex(
+const ƔAML_SIMPLE_KEY   = unirex(
   rexstr(FIRST_KEY_CHAR) + rexstr(LATER_KEY_CHAR) + '*'
 );
-const YAML_SIMPLE_VALUE = unirex(
+const ƔAML_SIMPLE_VALUE = unirex(
   rexstr(FIRST_VALUE_CHAR) + rexstr(LATER_VALUE_CHAR) + '*'
 );
-const YAML_KEY          = unirex(
-  rexstr(YAML_DOUBLE_QUOTE) + '|' +
-  rexstr(YAML_SINGLE_QUOTE) + '|' +
-  rexstr(YAML_SIMPLE_KEY)
+const ƔAML_KEY          = unirex(
+  rexstr(ƔAML_DOUBLE_QUOTE) + '|' +
+  rexstr(ƔAML_SINGLE_QUOTE) + '|' +
+  rexstr(ƔAML_SIMPLE_KEY)
 );
-const YAML_VALUE        = unirex(
-  rexstr(YAML_DOUBLE_QUOTE) + '|' +
-  rexstr(YAML_SINGLE_QUOTE) + '|' +
-  rexstr(YAML_SIMPLE_VALUE)
+const ƔAML_VALUE        = unirex(
+  rexstr(ƔAML_DOUBLE_QUOTE) + '|' +
+  rexstr(ƔAML_SINGLE_QUOTE) + '|' +
+  rexstr(ƔAML_SIMPLE_VALUE)
 );
-const YAML_SEPARATOR    = unirex(
+const ƔAML_SEPARATOR    = unirex(
   rexstr(ANY_WHITE_SPACE) +
   ':' + rexstr(WHITE_SPACE) +
   rexstr(ANY_WHITE_SPACE)
 );
-const YAML_LINE         = unirex(
-  '(' + rexstr(YAML_KEY) + ')' +
-  rexstr(YAML_SEPARATOR) +
-  '(' + rexstr(YAML_VALUE) + ')'
+const ƔAML_LINE         = unirex(
+  '(' + rexstr(ƔAML_KEY) + ')' +
+  rexstr(ƔAML_SEPARATOR) +
+  '(' + rexstr(ƔAML_VALUE) + ')'
 );
 
 /*  FRONTMATTER REGEX  */
 
-const YAML_FRONTMATTER  = unirex(
+const ƔAML_FRONTMATTER  = unirex(
   rexstr(POSSIBLE_STARTS) +
-  rexstr(YAML_LOOKAHEAD) +
-  rexstr(YAML_START) + rexstr(SOME_NEW_LINES) +
+  rexstr(ƔAML_LOOKAHEAD) +
+  rexstr(ƔAML_START) + rexstr(SOME_NEW_LINES) +
   '(?:' +
-    '(' + rexstr(INDENTATION) + ')' +
-    rexstr(YAML_LINE) + rexstr(SOME_NEW_LINES) +
-    '(?:' +
-      '\\1' + rexstr(YAML_LINE) + rexstr(SOME_NEW_LINES) +
-    '){0,4}' +
-  ')?' +
-  rexstr(YAML_END) + rexstr(POSSIBLE_ENDS)
+    rexstr(ANY_WHITE_SPACE) + rexstr(ƔAML_LINE) + rexstr(SOME_NEW_LINES) +
+  '){0,5}' +
+  rexstr(ƔAML_END) + rexstr(POSSIBLE_ENDS)
 );
 
 /*  SEARCHES  */
 
-const FIND_YAML_LINES   = unirex(
-  rexstr(NEW_LINE) + rexstr(INDENTATION) + rexstr(YAML_LINE)
+const FIND_ƔAML_LINE    = unirex(
+  rexstr(NEW_LINE) + rexstr(ANY_WHITE_SPACE) + rexstr(ƔAML_LINE)
 );
 
 /*  STRING PROCESSING  */
 
-function processString(str) {
+function processString (str) {
   switch (str.charAt(0)) {
   case '"':
-    return str
-      .substring(1, str.length - 1)
-      .replace(/\\0/g, '\x00')
-      .replace(/\\a/g, '\x07')
-      .replace(/\\b/g, '\x08')
-      .replace(/\\t/g, '\x09')
-      .replace(/\\\x09/g, '\x09')
-      .replace(/\\n/g, '\x0a')
-      .replace(/\\v/g, '\x0b')
-      .replace(/\\f/g, '\x0c')
-      .replace(/\\r/g, '\x0d')
-      .replace(/\\e/g, '\x1b')
-      .replace(/\\ /g, '\x20')
-      .replace(/\\"/g, '\x22')
-      .replace(/\\\//g, '\x2f')
-      .replace(/\\\\/g, '\x5c')
-      .replace(/\\N/g, '\x85')
-      .replace(/\\_/g, '\xa0')
-      .replace(/\\L/g, '\u2028')
-      .replace(/\\P/g, '\u2029')
-      .replace(
-        new RegExp(
-          unirex(
-            rexstr(/\\x/) + '(' + rexstr(HEXADECIMAL_CHARS) + '{2})'
-          ), 'gu'
-        ), (_, n) => String.fromCodePoint('0x' + n)
-      )
-      .replace(
-        new RegExp(
-          unirex(
-            rexstr(/\\u/) + '(' + rexstr(HEXADECIMAL_CHARS) + '{4})'
-          ), 'gu'
-        ), (_, n) => String.fromCodePoint('0x' + n)
-      )
-      .replace(
-        new RegExp(
-          unirex(
-            rexstr(/\\U/) + '(' + rexstr(HEXADECIMAL_CHARS) + '{8})'
-          ), 'gu'
-        ), (_, n) => String.fromCodePoint('0x' + n)
-      );
+    return str.substring(1, str.length - 1);
   case '\'':
     return str
       .substring(1, str.length - 1)
@@ -321,15 +269,18 @@ export function processBio(content) {
     text: content,
     metadata: [],
   };
-  let yaml = content.match(YAML_FRONTMATTER);
-  if (!yaml) return result;
-  else yaml = yaml[0];
-  let start = content.search(YAML_START);
-  let end = start + yaml.length - yaml.search(YAML_START);
-  result.text = content.substr(0, start) + content.substr(end);
+  let ɣaml = content.match(ƔAML_FRONTMATTER);
+  if (!ɣaml) {
+    return result;
+  } else {
+    ɣaml = ɣaml[0];
+  }
+  const start = content.search(ƔAML_START);
+  const end = start + ɣaml.length - ɣaml.search(ƔAML_START);
+  result.text = content.substr(end);
   let metadata = null;
-  let query = new RegExp(FIND_YAML_LINES, 'g');
-  while ((metadata = query.exec(yaml))) {
+  let query = new RegExp(rexstr(FIND_ƔAML_LINE), 'g');  //  Some browsers don't allow flags unless both args are strings
+  while ((metadata = query.exec(ɣaml))) {
     result.metadata.push([
       processString(metadata[1]),
       processString(metadata[2]),
@@ -352,63 +303,23 @@ export function createBio(note, data) {
         let val = '' + data[i][1];
 
         //  Key processing
-        if (key === (key.match(YAML_SIMPLE_KEY) || [])[0]) /*  do nothing  */;
-        else if (key.indexOf('\'') === -1 && key === (key.match(ANY_ESCAPED_APOS) || [])[0]) key = '\'' + key + '\'';
+        if (key === (key.match(ƔAML_SIMPLE_KEY) || [])[0]) /*  do nothing  */;
+        else if (key === (key.match(ANY_QUOTE_CHAR) || [])[0]) key = '"' + key + '"';
         else {
           key = key
-            .replace(/\x00/g, '\\0')
-            .replace(/\x07/g, '\\a')
-            .replace(/\x08/g, '\\b')
-            .replace(/\x0a/g, '\\n')
-            .replace(/\x0b/g, '\\v')
-            .replace(/\x0c/g, '\\f')
-            .replace(/\x0d/g, '\\r')
-            .replace(/\x1b/g, '\\e')
-            .replace(/\x22/g, '\\"')
-            .replace(/\x5c/g, '\\\\');
-          let badchars = key.match(
-            new RegExp(rexstr(NOT_ALLOWED_CHAR), 'gu')
-          ) || [];
-          for (let j = 0; j < badchars.length; j++) {
-            key = key.replace(
-              badchars[i],
-              '\\u' + badchars[i].codePointAt(0).toLocaleString('en', {
-                useGrouping: false,
-                minimumIntegerDigits: 4,
-              })
-            );
-          }
-          key = '"' + key + '"';
+            .replace(/'/g, '\'\'')
+            .replace(new RegExp(rexstr(NOT_ALLOWED_CHAR), compat_mode ? 'g' : 'gu'), '�');
+          key = '\'' + key + '\'';
         }
 
         //  Value processing
-        if (val === (val.match(YAML_SIMPLE_VALUE) || [])[0]) /*  do nothing  */;
-        else if (val.indexOf('\'') === -1 && val === (val.match(ANY_ESCAPED_APOS) || [])[0]) val = '\'' + val + '\'';
+        if (val === (val.match(ƔAML_SIMPLE_VALUE) || [])[0]) /*  do nothing  */;
+        else if (val === (val.match(ANY_QUOTE_CHAR) || [])[0]) val = '"' + val + '"';
         else {
-          val = val
-            .replace(/\x00/g, '\\0')
-            .replace(/\x07/g, '\\a')
-            .replace(/\x08/g, '\\b')
-            .replace(/\x0a/g, '\\n')
-            .replace(/\x0b/g, '\\v')
-            .replace(/\x0c/g, '\\f')
-            .replace(/\x0d/g, '\\r')
-            .replace(/\x1b/g, '\\e')
-            .replace(/\x22/g, '\\"')
-            .replace(/\x5c/g, '\\\\');
-          let badchars = val.match(
-            new RegExp(rexstr(NOT_ALLOWED_CHAR), 'gu')
-          ) || [];
-          for (let j = 0; j < badchars.length; j++) {
-            val = val.replace(
-              badchars[i],
-              '\\u' + badchars[i].codePointAt(0).toLocaleString('en', {
-                useGrouping: false,
-                minimumIntegerDigits: 4,
-              })
-            );
-          }
-          val = '"' + val + '"';
+          key = key
+            .replace(/'/g, '\'\'')
+            .replace(new RegExp(rexstr(NOT_ALLOWED_CHAR), compat_mode ? 'g' : 'gu'), '�');
+          key = '\'' + key + '\'';
         }
 
         frontmatter += key + ': ' + val + '\n';