about summary refs log tree commit diff
diff options
context:
space:
mode:
authorNolan Lawson <nolan@nolanlawson.com>2017-07-03 02:02:36 -0700
committerEugen Rochko <eugen@zeonfederated.com>2017-07-03 11:02:36 +0200
commite28258010182b56f27cfbd3f9f9a58fd9cd8870d (patch)
tree73cc02af9c609f1b542075fc11291bac3963a227
parent331f0953e9c1855b8195af756e83fee98600d1b8 (diff)
Faster emojify() by avoiding str.replace() entirely (#4049)
-rw-r--r--app/javascript/mastodon/emoji.js69
-rw-r--r--package.json1
-rw-r--r--spec/javascript/components/emojify.test.js34
-rw-r--r--yarn.lock4
4 files changed, 71 insertions, 37 deletions
diff --git a/app/javascript/mastodon/emoji.js b/app/javascript/mastodon/emoji.js
index d0df71ea3..7043d5f3a 100644
--- a/app/javascript/mastodon/emoji.js
+++ b/app/javascript/mastodon/emoji.js
@@ -1,60 +1,55 @@
 import emojione from 'emojione';
+import Trie from 'substring-trie';
 
-const toImage = str => shortnameToImage(unicodeToImage(str));
+const mappedUnicode = emojione.mapUnicodeToShort();
+const trie = new Trie(Object.keys(emojione.jsEscapeMap));
 
-const unicodeToImage = str => {
-  const mappedUnicode = emojione.mapUnicodeToShort();
-
-  return str.replace(emojione.regUnicode, unicodeChar => {
-    if (typeof unicodeChar === 'undefined' || unicodeChar === '' || !(unicodeChar in emojione.jsEscapeMap)) {
-      return unicodeChar;
-    }
-
-    const unicode  = emojione.jsEscapeMap[unicodeChar];
-    const short    = mappedUnicode[unicode];
-    const filename = emojione.emojioneList[short].fname;
-    const alt      = emojione.convert(unicode.toUpperCase());
-
-    return `<img draggable="false" class="emojione" alt="${alt}" title="${short}" src="/emoji/${filename}.svg" />`;
-  });
-};
-
-const shortnameToImage = str => {
-  // This walks through the string from end to start, ignoring any tags (<p>, <br>, etc.)
-  // and replacing valid shortnames like :smile: and :wink: that _aren't_ within
-  // tags with an <img> version.
-  // The goal is to be the same as an emojione.regShortNames replacement, but faster.
-  // The reason we go backwards is because then we can replace substrings as we go.
-  let i = str.length;
+function emojify(str) {
+  // This walks through the string from start to end, ignoring any tags (<p>, <br>, etc.)
+  // and replacing valid shortnames like :smile: and :wink: as well as unicode strings
+  // that _aren't_ within tags with an <img> version.
+  // The goal is to be the same as an emojione.regShortNames/regUnicode replacement, but faster.
+  let i = -1;
   let insideTag = false;
   let insideShortname = false;
-  let shortnameEndIndex = -1;
-  while (i--) {
+  let shortnameStartIndex = -1;
+  let match;
+  while (++i < str.length) {
     const char = str.charAt(i);
     if (insideShortname && char === ':') {
-      const shortname = str.substring(i, shortnameEndIndex + 1);
+      const shortname = str.substring(shortnameStartIndex, i + 1);
       if (shortname in emojione.emojioneList) {
         const unicode = emojione.emojioneList[shortname].unicode[emojione.emojioneList[shortname].unicode.length - 1];
         const alt = emojione.convert(unicode.toUpperCase());
         const replacement = `<img draggable="false" class="emojione" alt="${alt}" title="${shortname}" src="/emoji/${unicode}.svg" />`;
-        str = str.substring(0, i) + replacement + str.substring(shortnameEndIndex + 1);
+        str = str.substring(0, shortnameStartIndex) + replacement + str.substring(i + 1);
+        i += (replacement.length - shortname.length - 1); // jump ahead the length we've added to the string
       } else {
-        i++; // stray colon, try again
+        i--; // stray colon, try again
       }
       insideShortname = false;
-    } else if (insideTag && char === '<') {
+    } else if (insideTag && char === '>') {
       insideTag = false;
-    } else if (char === '>') {
+    } else if (char === '<') {
       insideTag = true;
       insideShortname = false;
     } else if (!insideTag && char === ':') {
       insideShortname = true;
-      shortnameEndIndex = i;
+      shortnameStartIndex = i;
+    } else if (!insideTag && (match = trie.search(str.substring(i)))) {
+      const unicodeStr = match;
+      if (unicodeStr in emojione.jsEscapeMap) {
+        const unicode  = emojione.jsEscapeMap[unicodeStr];
+        const short    = mappedUnicode[unicode];
+        const filename = emojione.emojioneList[short].fname;
+        const alt      = emojione.convert(unicode.toUpperCase());
+        const replacement =  `<img draggable="false" class="emojione" alt="${alt}" title="${short}" src="/emoji/${filename}.svg" />`;
+        str = str.substring(0, i) + replacement + str.substring(i + unicodeStr.length);
+        i += (replacement.length - unicodeStr.length); // jump ahead the length we've added to the string
+      }
     }
   }
   return str;
-};
+}
 
-export default function emojify(text) {
-  return toImage(text);
-};
+export default emojify;
diff --git a/package.json b/package.json
index 7fa80a0c2..d5c05dae3 100644
--- a/package.json
+++ b/package.json
@@ -102,6 +102,7 @@
     "sass-loader": "^6.0.6",
     "stringz": "^0.2.2",
     "style-loader": "^0.18.2",
+    "substring-trie": "^1.0.0",
     "throng": "^4.0.0",
     "tiny-queue": "^0.2.1",
     "uuid": "^3.1.0",
diff --git a/spec/javascript/components/emojify.test.js b/spec/javascript/components/emojify.test.js
index 7a496623e..3e8b25af9 100644
--- a/spec/javascript/components/emojify.test.js
+++ b/spec/javascript/components/emojify.test.js
@@ -46,4 +46,38 @@ describe('emojify', () => {
     expect(emojify(':smile')).to.equal(':smile');
   });
 
+  it('does two emoji next to each other', () => {
+    expect(emojify(':smile::wink:')).to.equal(
+      '<img draggable="false" class="emojione" alt="😄" title=":smile:" src="/emoji/1f604.svg" /><img draggable="false" class="emojione" alt="😉" title=":wink:" src="/emoji/1f609.svg" />');
+  });
+
+  it('does unicode', () => {
+    expect(emojify('\uD83D\uDC69\u200D\uD83D\uDC69\u200D\uD83D\uDC66\u200D\uD83D\uDC66')).to.equal(
+      '<img draggable="false" class="emojione" alt="👩‍👩‍👦‍👦" title=":family_wwbb:" src="/emoji/1f469-1f469-1f466-1f466.svg" />');
+    expect(emojify('\uD83D\uDC68\uD83D\uDC69\uD83D\uDC67\uD83D\uDC67')).to.equal(
+      '<img draggable="false" class="emojione" alt="👨👩👧👧" title=":family_mwgg:" src="/emoji/1f468-1f469-1f467-1f467.svg" />');
+    expect(emojify('\uD83D\uDC69\uD83D\uDC69\uD83D\uDC66')).to.equal('<img draggable="false" class="emojione" alt="👩👩👦" title=":family_wwb:" src="/emoji/1f469-1f469-1f466.svg" />');
+    expect(emojify('\u2757')).to.equal(
+      '<img draggable="false" class="emojione" alt="❗" title=":exclamation:" src="/emoji/2757.svg" />');
+  });
+
+  it('does multiple unicode', () => {
+    expect(emojify('\u2757 #\uFE0F\u20E3')).to.equal(
+      '<img draggable="false" class="emojione" alt="❗" title=":exclamation:" src="/emoji/2757.svg" /> <img draggable="false" class="emojione" alt="#️⃣" title=":hash:" src="/emoji/0023-20e3.svg" />');
+    expect(emojify('\u2757#\uFE0F\u20E3')).to.equal(
+      '<img draggable="false" class="emojione" alt="❗" title=":exclamation:" src="/emoji/2757.svg" /><img draggable="false" class="emojione" alt="#️⃣" title=":hash:" src="/emoji/0023-20e3.svg" />');
+    expect(emojify('\u2757 #\uFE0F\u20E3 \u2757')).to.equal(
+      '<img draggable="false" class="emojione" alt="❗" title=":exclamation:" src="/emoji/2757.svg" /> <img draggable="false" class="emojione" alt="#️⃣" title=":hash:" src="/emoji/0023-20e3.svg" /> <img draggable="false" class="emojione" alt="❗" title=":exclamation:" src="/emoji/2757.svg" />');
+    expect(emojify('foo \u2757 #\uFE0F\u20E3 bar')).to.equal(
+      'foo <img draggable="false" class="emojione" alt="❗" title=":exclamation:" src="/emoji/2757.svg" /> <img draggable="false" class="emojione" alt="#️⃣" title=":hash:" src="/emoji/0023-20e3.svg" /> bar');
+  });
+
+  it('does mixed unicode and shortnames', () => {
+    expect(emojify(':smile:#\uFE0F\u20E3:wink:\u2757')).to.equal('<img draggable="false" class="emojione" alt="😄" title=":smile:" src="/emoji/1f604.svg" /><img draggable="false" class="emojione" alt="#️⃣" title=":hash:" src="/emoji/0023-20e3.svg" /><img draggable="false" class="emojione" alt="😉" title=":wink:" src="/emoji/1f609.svg" /><img draggable="false" class="emojione" alt="❗" title=":exclamation:" src="/emoji/2757.svg" />');
+  });
+
+  it('ignores unicode inside of tags', () => {
+    expect(emojify('<p data-foo="\uD83D\uDC69\uD83D\uDC69\uD83D\uDC66"></p>')).to.equal('<p data-foo="\uD83D\uDC69\uD83D\uDC69\uD83D\uDC66"></p>');
+  });
+
 });
diff --git a/yarn.lock b/yarn.lock
index adabca08d..609f256c9 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -6819,6 +6819,10 @@ style-loader@^0.18.2:
     loader-utils "^1.0.2"
     schema-utils "^0.3.0"
 
+substring-trie@^1.0.0:
+  version "1.0.0"
+  resolved "https://registry.yarnpkg.com/substring-trie/-/substring-trie-1.0.0.tgz#5a7ecb83aefcca7b3720f7897cf69e97023be143"
+
 sugarss@^1.0.0:
   version "1.0.0"
   resolved "https://registry.yarnpkg.com/sugarss/-/sugarss-1.0.0.tgz#65e51b3958432fb70d5451a68bb33e32d0cf1ef7"