about summary refs log tree commit diff
path: root/streaming
diff options
context:
space:
mode:
authorClaire <claire.github-309c@sitedethib.com>2022-07-17 22:07:20 +0200
committerClaire <claire.github-309c@sitedethib.com>2022-07-17 22:07:20 +0200
commitcd87d7dcef814ad86fb15334680cb0e3232437a9 (patch)
tree63db8838568ea440bb3cb9797cdbaf5c4952e9e7 /streaming
parent9094c2f52c24e1c00b594e7c11cd00e4a07eb431 (diff)
parentc3f0621a59a74d0e20e6db6170894871c48e8f0f (diff)
Merge branch 'main' into glitch-soc/merge-upstream
- `.env.production.sample`:
  Our sample config file is very different from upstream since it is much more
  complete. Upstream added documentation for a few env variables.
  Copied the new variables/documentation from upstream.
- `app/lib/feed_manager.rb`:
  Upstream added a timeline type (hashtags), while glitch-soc already had an
  extra one (direct messages). Not really a conflict but textually close
  changes.
  Ported upstream's changes.
- `app/models/custom_emoji.rb`:
  Upstream upped the custom emoji size limit, while glitch-soc had configurable
  limits.
  Upped the default limits accordingly.
- `streaming/index.js`:
  Upstream reworked how hastags were normalized. Minor conflict due to
  glitch-soc's handling of instance-local posts.
  Ported upstream's changes.
Diffstat (limited to 'streaming')
-rw-r--r--streaming/index.js32
1 files changed, 30 insertions, 2 deletions
diff --git a/streaming/index.js b/streaming/index.js
index 183cdf789..ff7d48250 100644
--- a/streaming/index.js
+++ b/streaming/index.js
@@ -901,6 +901,34 @@ const startWorker = async (workerId) => {
   };
 
   /**
+   * See app/lib/ascii_folder.rb for the canon definitions
+   * of these constants
+   */
+  const NON_ASCII_CHARS        = 'ÀÁÂÃÄÅàáâãäåĀāĂ㥹ÇçĆćĈĉĊċČčÐðĎďĐđÈÉÊËèéêëĒēĔĕĖėĘęĚěĜĝĞğĠġĢģĤĥĦħÌÍÎÏìíîïĨĩĪīĬĭĮįİıĴĵĶķĸĹĺĻļĽľĿŀŁłÑñŃńŅņŇňʼnŊŋÒÓÔÕÖØòóôõöøŌōŎŏŐőŔŕŖŗŘřŚśŜŝŞşŠšſŢţŤťŦŧÙÚÛÜùúûüŨũŪūŬŭŮůŰűŲųŴŵÝýÿŶŷŸŹźŻżŽž';
+  const EQUIVALENT_ASCII_CHARS = 'AAAAAAaaaaaaAaAaAaCcCcCcCcCcDdDdDdEEEEeeeeEeEeEeEeEeGgGgGgGgHhHhIIIIiiiiIiIiIiIiIiJjKkkLlLlLlLlLlNnNnNnNnnNnOOOOOOooooooOoOoOoRrRrRrSsSsSsSssTtTtTtUUUUuuuuUuUuUuUuUuUuWwYyyYyYZzZzZz';
+
+  /**
+   * @param {string} str
+   * @return {string}
+   */
+  const foldToASCII = str => {
+    const regex = new RegExp(NON_ASCII_CHARS.split('').join('|'), 'g');
+
+    return str.replace(regex, match => {
+      const index = NON_ASCII_CHARS.indexOf(match);
+      return EQUIVALENT_ASCII_CHARS[index];
+    });
+  };
+
+  /**
+   * @param {string} str
+   * @return {string}
+   */
+  const normalizeHashtag = str => {
+    return foldToASCII(str.normalize('NFKC').toLowerCase()).replace(/[^\p{L}\p{N}_\u00b7\u200c]/gu, '');
+  };
+
+  /**
    * @param {any} req
    * @param {string} name
    * @param {StreamParams} params
@@ -990,7 +1018,7 @@ const startWorker = async (workerId) => {
         reject('No tag for stream provided');
       } else {
         resolve({
-          channelIds: [`timeline:hashtag:${params.tag.toLowerCase()}`],
+          channelIds: [`timeline:hashtag:${normalizeHashtag(params.tag)}`],
           options: { needsFiltering: true, allowLocalOnly: true },
         });
       }
@@ -1001,7 +1029,7 @@ const startWorker = async (workerId) => {
         reject('No tag for stream provided');
       } else {
         resolve({
-          channelIds: [`timeline:hashtag:${params.tag.toLowerCase()}:local`],
+          channelIds: [`timeline:hashtag:${normalizeHashtag(params.tag)}:local`],
           options: { needsFiltering: true, allowLocalOnly: true },
         });
       }