From 6107e954040443d1fc4344b440b229d70fe75166 Mon Sep 17 00:00:00 2001 From: kibigo! Date: Wed, 28 Jun 2017 00:27:44 -0700 Subject: Backend YAML Processing + Profile Metadata on Static Pages --- app/lib/frontmatter_handler.rb | 242 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 242 insertions(+) create mode 100644 app/lib/frontmatter_handler.rb (limited to 'app/lib/frontmatter_handler.rb') diff --git a/app/lib/frontmatter_handler.rb b/app/lib/frontmatter_handler.rb new file mode 100644 index 000000000..19007cf4e --- /dev/null +++ b/app/lib/frontmatter_handler.rb @@ -0,0 +1,242 @@ +# frozen_string_literal: true + +require 'singleton' + +# See also `app/javascript/features/account/util/bio_metadata.js`. + +class FrontmatterHandler + include Singleton + + # CONVENIENCE FUNCTIONS # + + def self.unirex(str) + Regexp.new str, false, 'um' + end + def self.rexstr(exp) + '(?:' + exp.source + ')' + end + + # CHARACTER CLASSES # + + DOCUMENT_START = /^/ + DOCUMENT_END = /$/ + ALLOWED_CHAR = # c-printable` in the YAML 1.2 spec. + /[\t\n\r\u{20}-\u{7e}\u{85}\u{a0}-\u{d7ff}\u{e000}-\u{fffd}\u{10000}-\u{10ffff}]/u + WHITE_SPACE = /[ \t]/ + INDENTATION = / */ + LINE_BREAK = /\r?\n|\r|/ + ESCAPE_CHAR = /[0abt\tnvfre "\/\\N_LP]/ + HEXADECIMAL_CHARS = /[0-9a-fA-F]/ + INDICATOR = /[-?:,\[\]{}&#*!|>'"%@`]/ + FLOW_CHAR = /[,\[\]{}]/ + + # NEGATED CHARACTER CLASSES # + + NOT_WHITE_SPACE = unirex '(?!' + rexstr(WHITE_SPACE) + ').' + NOT_LINE_BREAK = unirex '(?!' + rexstr(LINE_BREAK) + ').' + NOT_INDICATOR = unirex '(?!' + rexstr(INDICATOR) + ').' + NOT_FLOW_CHAR = unirex '(?!' + rexstr(FLOW_CHAR) + ').' + + # BASIC CONSTRUCTS # + + ANY_WHITE_SPACE = unirex rexstr(WHITE_SPACE) + '*' + ANY_ALLOWED_CHARS = unirex rexstr(ALLOWED_CHAR) + '*' + NEW_LINE = unirex( + rexstr(ANY_WHITE_SPACE) + rexstr(LINE_BREAK) + ) + SOME_NEW_LINES = unirex( + '(?:' + rexstr(ANY_WHITE_SPACE) + rexstr(LINE_BREAK) + ')+' + ) + POSSIBLE_STARTS = unirex( + rexstr(DOCUMENT_START) + rexstr(/]*>/) + '?' + ) + POSSIBLE_ENDS = unirex( + rexstr(SOME_NEW_LINES) + '|' + + rexstr(DOCUMENT_END) + '|' + + rexstr(/<\/p>/) + ) + CHARACTER_ESCAPE = unirex( + rexstr(/\\/) + + '(?:' + + rexstr(ESCAPE_CHAR) + '|' + + rexstr(/x/) + rexstr(HEXADECIMAL_CHARS) + '{2}' + '|' + + rexstr(/u/) + rexstr(HEXADECIMAL_CHARS) + '{4}' + '|' + + rexstr(/U/) + rexstr(HEXADECIMAL_CHARS) + '{8}' + + ')' + ) + ESCAPED_CHAR = unirex( + rexstr(/(?!["\\])/) + rexstr(NOT_LINE_BREAK) + '|' + + rexstr(CHARACTER_ESCAPE) + ) + ANY_ESCAPED_CHARS = unirex( + rexstr(ESCAPED_CHAR) + '*' + ) + ESCAPED_APOS = unirex( + '(?=' + rexstr(NOT_LINE_BREAK) + ')' + rexstr(/[^']|''/) + ) + ANY_ESCAPED_APOS = unirex( + rexstr(ESCAPED_APOS) + '*' + ) + FIRST_KEY_CHAR = unirex( + '(?=' + rexstr(NOT_LINE_BREAK) + ')' + + '(?=' + rexstr(NOT_WHITE_SPACE) + ')' + + rexstr(NOT_INDICATOR) + '|' + + rexstr(/[?:-]/) + + '(?=' + rexstr(NOT_LINE_BREAK) + ')' + + '(?=' + rexstr(NOT_WHITE_SPACE) + ')' + + '(?=' + rexstr(NOT_FLOW_CHAR) + ')' + ) + FIRST_VALUE_CHAR = unirex( + '(?=' + rexstr(NOT_LINE_BREAK) + ')' + + '(?=' + rexstr(NOT_WHITE_SPACE) + ')' + + rexstr(NOT_INDICATOR) + '|' + + rexstr(/[?:-]/) + + '(?=' + rexstr(NOT_LINE_BREAK) + ')' + + '(?=' + rexstr(NOT_WHITE_SPACE) + ')' + # Flow indicators are allowed in values. + ) + LATER_KEY_CHAR = unirex( + rexstr(WHITE_SPACE) + '|' + + '(?=' + rexstr(NOT_LINE_BREAK) + ')' + + '(?=' + rexstr(NOT_WHITE_SPACE) + ')' + + '(?=' + rexstr(NOT_FLOW_CHAR) + ')' + + rexstr(/[^:#]#?/) + '|' + + rexstr(/:/) + '(?=' + rexstr(NOT_WHITE_SPACE) + ')' + ) + LATER_VALUE_CHAR = unirex( + rexstr(WHITE_SPACE) + '|' + + '(?=' + rexstr(NOT_LINE_BREAK) + ')' + + '(?=' + rexstr(NOT_WHITE_SPACE) + ')' + + # Flow indicators are allowed in values. + rexstr(/[^:#]#?/) + '|' + + rexstr(/:/) + '(?=' + rexstr(NOT_WHITE_SPACE) + ')' + ) + + # YAML CONSTRUCTS # + + YAML_START = unirex( + rexstr(ANY_WHITE_SPACE) + rexstr(/---/) + ) + YAML_END = unirex( + rexstr(ANY_WHITE_SPACE) + rexstr(/(?:---|\.\.\.)/) + ) + YAML_LOOKAHEAD = unirex( + '(?=' + + rexstr(YAML_START) + + rexstr(ANY_ALLOWED_CHARS) + rexstr(NEW_LINE) + + rexstr(YAML_END) + rexstr(POSSIBLE_ENDS) + + ')' + ) + YAML_DOUBLE_QUOTE = unirex( + rexstr(/"/) + rexstr(ANY_ESCAPED_CHARS) + rexstr(/"/) + ) + YAML_SINGLE_QUOTE = unirex( + rexstr(/'/) + rexstr(ANY_ESCAPED_APOS) + rexstr(/'/) + ) + YAML_SIMPLE_KEY = unirex( + rexstr(FIRST_KEY_CHAR) + rexstr(LATER_KEY_CHAR) + '*' + ) + YAML_SIMPLE_VALUE = unirex( + rexstr(FIRST_VALUE_CHAR) + rexstr(LATER_VALUE_CHAR) + '*' + ) + YAML_KEY = unirex( + rexstr(YAML_DOUBLE_QUOTE) + '|' + + rexstr(YAML_SINGLE_QUOTE) + '|' + + rexstr(YAML_SIMPLE_KEY) + ) + YAML_VALUE = unirex( + rexstr(YAML_DOUBLE_QUOTE) + '|' + + rexstr(YAML_SINGLE_QUOTE) + '|' + + rexstr(YAML_SIMPLE_VALUE) + ) + YAML_SEPARATOR = unirex( + rexstr(ANY_WHITE_SPACE) + + ':' + rexstr(WHITE_SPACE) + + rexstr(ANY_WHITE_SPACE) + ) + YAML_LINE = unirex( + '(' + rexstr(YAML_KEY) + ')' + + rexstr(YAML_SEPARATOR) + + '(' + rexstr(YAML_VALUE) + ')' + ) + + # FRONTMATTER REGEX # + + YAML_FRONTMATTER = unirex( + rexstr(POSSIBLE_STARTS) + + rexstr(YAML_LOOKAHEAD) + + rexstr(YAML_START) + rexstr(SOME_NEW_LINES) + + '(?:' + + '(' + rexstr(INDENTATION) + ')' + + rexstr(YAML_LINE) + rexstr(SOME_NEW_LINES) + + '(?:' + + '\\1' + rexstr(YAML_LINE) + rexstr(SOME_NEW_LINES) + + '){0,4}' + + ')?' + + rexstr(YAML_END) + rexstr(POSSIBLE_ENDS) + ) + + # SEARCHES # + + FIND_YAML_LINES = unirex( + rexstr(NEW_LINE) + rexstr(INDENTATION) + rexstr(YAML_LINE) + ) + + # STRING PROCESSING # + + def process_string(str) + case str[0] + when '"' + str[1..-2] + .gsub(/\\0/, "\u{00}") + .gsub(/\\a/, "\u{07}") + .gsub(/\\b/, "\u{08}") + .gsub(/\\t/, "\u{09}") + .gsub(/\\n/, "\u{0a}") + .gsub(/\\v/, "\u{0b}") + .gsub(/\\f/, "\u{0c}") + .gsub(/\\r/, "\u{0d}") + .gsub(/\\e/, "\u{1b}") + .gsub(/\\ /, "\u{20}") + .gsub(/\\"/, "\u{22}") + .gsub(/\\\//, "\u{2f}") + .gsub(/\\\\/, "\u{5c}") + .gsub(/\\N/, "\u{85}") + .gsub(/\\_/, "\u{a0}") + .gsub(/\\L/, "\u{2028}") + .gsub(/\\P/, "\u{2029}") + .gsub(/\\x([0-9a-fA-F]{2})/mu) {|s| $1.to_i.chr Encoding::UTF_8} + .gsub(/\\u([0-9a-fA-F]{4})/mu) {|s| $1.to_i.chr Encoding::UTF_8} + .gsub(/\\U([0-9a-fA-F]{8})/mu) {|s| $1.to_i.chr Encoding::UTF_8} + when "'" + str[1..-2].gsub(/''/, "'") + else + str + end + end + + # BIO PROCESSING # + + def process_bio content + result = { + text: content.gsub(/"/, '"').gsub(/'/, "'"), + metadata: [] + } + yaml = YAML_FRONTMATTER.match(result[:text]) + return result unless yaml + yaml = yaml[0] + start = YAML_START =~ result[:text] + ending = start + yaml.length - (YAML_START =~ yaml) + result[:text][start..ending - 1] = '' + metadata = nil + index = 0 + while metadata = FIND_YAML_LINES.match(yaml, index) do + index = metadata.end(0) + result[:metadata].push [ + process_string(metadata[1]), process_string(metadata[2]) + ] + end + return result + end + +end -- cgit From 6cbbdc805f68b0318c9b3a7c01f4bdaefee5a2fa Mon Sep 17 00:00:00 2001 From: kibigo! Date: Wed, 28 Jun 2017 15:16:13 -0700 Subject: Fixed Regexp.new syntax --- app/lib/frontmatter_handler.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'app/lib/frontmatter_handler.rb') diff --git a/app/lib/frontmatter_handler.rb b/app/lib/frontmatter_handler.rb index 19007cf4e..4ee43f092 100644 --- a/app/lib/frontmatter_handler.rb +++ b/app/lib/frontmatter_handler.rb @@ -10,7 +10,7 @@ class FrontmatterHandler # CONVENIENCE FUNCTIONS # def self.unirex(str) - Regexp.new str, false, 'um' + Regexp.new str, Regexp::MULTILINE, 'u' end def self.rexstr(exp) '(?:' + exp.source + ')' -- cgit From c58877862d8622b009e473205a6322e013849a7e Mon Sep 17 00:00:00 2001 From: kibigo! Date: Fri, 30 Jun 2017 15:03:31 -0700 Subject: createBio function added --- .../mastodon/features/account/util/bio_metadata.js | 85 ++++++++++++++++++++++ app/lib/frontmatter_handler.rb | 2 + 2 files changed, 87 insertions(+) (limited to 'app/lib/frontmatter_handler.rb') diff --git a/app/javascript/mastodon/features/account/util/bio_metadata.js b/app/javascript/mastodon/features/account/util/bio_metadata.js index fc24549b6..bdbb1750b 100644 --- a/app/javascript/mastodon/features/account/util/bio_metadata.js +++ b/app/javascript/mastodon/features/account/util/bio_metadata.js @@ -69,6 +69,9 @@ const NOT_WHITE_SPACE = unirex('(?!' + rexstr(WHITE_SPACE) + ')[^]'); const NOT_LINE_BREAK = unirex('(?!' + rexstr(LINE_BREAK) + ')[^]'); const NOT_INDICATOR = unirex('(?!' + rexstr(INDICATOR) + ')[^]'); const NOT_FLOW_CHAR = unirex('(?!' + rexstr(FLOW_CHAR) + ')[^]'); +const NOT_ALLOWED_CHAR = unirex( + '(?!' + rexstr(ALLOWED_CHAR) + ')[^]' +); /* BASIC CONSTRUCTS */ @@ -226,6 +229,7 @@ function processString(str) { .replace(/\\a/g, '\x07') .replace(/\\b/g, '\x08') .replace(/\\t/g, '\x09') + .replace(/\\\x09/g, '\x09') .replace(/\\n/g, '\x0a') .replace(/\\v/g, '\x0b') .replace(/\\f/g, '\x0c') @@ -293,3 +297,84 @@ export function processBio(content) { } return result; } + +/* BIO CREATION */ + +export function createBio(note, data) { + if (!note) note = ''; + let frontmatter = ''; + if ((data && data.length) || note.match(/^\s*---\s+/)) { + if (!data) frontmatter = '---\n...\n'; + else { + frontmatter += '---\n'; + for (let i = 0; i < data.length; i++) { + let key = '' + data[i][0]; + let val = '' + data[i][1]; + + // Key processing + if (key === (key.match(YAML_SIMPLE_KEY) || [])[0]) /* do nothing */; + else if (key.indexOf('\'') === -1 && key === (key.match(ANY_ESCAPED_APOS) || [])[0]) key = '\'' + key + '\''; + else { + key = key + .replace(/\x00/g, '\\0') + .replace(/\x07/g, '\\a') + .replace(/\x08/g, '\\b') + .replace(/\x0a/g, '\\n') + .replace(/\x0b/g, '\\v') + .replace(/\x0c/g, '\\f') + .replace(/\x0d/g, '\\r') + .replace(/\x1b/g, '\\e') + .replace(/\x22/g, '\\"') + .replace(/\x5c/g, '\\\\'); + let badchars = key.match( + new RegExp(rexstr(NOT_ALLOWED_CHAR), 'gu') + ) || []; + for (let j = 0; j < badchars.length; j++) { + key = key.replace( + badchars[i], + '\\u' + badchars[i].codePointAt(0).toLocaleString('en', { + useGrouping: false, + minimumIntegerDigits: 4, + }) + ); + } + key = '"' + key + '"'; + } + + // Value processing + if (val === (val.match(YAML_SIMPLE_VALUE) || [])[0]) /* do nothing */; + else if (val.indexOf('\'') === -1 && val === (val.match(ANY_ESCAPED_APOS) || [])[0]) val = '\'' + val + '\''; + else { + val = val + .replace(/\x00/g, '\\0') + .replace(/\x07/g, '\\a') + .replace(/\x08/g, '\\b') + .replace(/\x0a/g, '\\n') + .replace(/\x0b/g, '\\v') + .replace(/\x0c/g, '\\f') + .replace(/\x0d/g, '\\r') + .replace(/\x1b/g, '\\e') + .replace(/\x22/g, '\\"') + .replace(/\x5c/g, '\\\\'); + let badchars = val.match( + new RegExp(rexstr(NOT_ALLOWED_CHAR), 'gu') + ) || []; + for (let j = 0; j < badchars.length; j++) { + val = val.replace( + badchars[i], + '\\u' + badchars[i].codePointAt(0).toLocaleString('en', { + useGrouping: false, + minimumIntegerDigits: 4, + }) + ); + } + val = '"' + val + '"'; + } + + frontmatter += key + ': ' + val + '\n'; + } + frontmatter += '...\n'; + } + } + return frontmatter + note; +} diff --git a/app/lib/frontmatter_handler.rb b/app/lib/frontmatter_handler.rb index 4ee43f092..83e5f465e 100644 --- a/app/lib/frontmatter_handler.rb +++ b/app/lib/frontmatter_handler.rb @@ -36,6 +36,7 @@ class FrontmatterHandler NOT_LINE_BREAK = unirex '(?!' + rexstr(LINE_BREAK) + ').' NOT_INDICATOR = unirex '(?!' + rexstr(INDICATOR) + ').' NOT_FLOW_CHAR = unirex '(?!' + rexstr(FLOW_CHAR) + ').' + NOT_ALLOWED_CHAR = unirex '(?!' + rexstr(ALLOWED_CHAR) + ').' # BASIC CONSTRUCTS # @@ -192,6 +193,7 @@ class FrontmatterHandler .gsub(/\\a/, "\u{07}") .gsub(/\\b/, "\u{08}") .gsub(/\\t/, "\u{09}") + .gsub(/\\\u{09}/, "\u{09}") .gsub(/\\n/, "\u{0a}") .gsub(/\\v/, "\u{0b}") .gsub(/\\f/, "\u{0c}") -- cgit