diff options
author | Eugen Rochko <eugen@zeonfederated.com> | 2022-07-13 15:03:28 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-07-13 15:03:28 +0200 |
commit | e7aa2be828f6a632dadd5c41e2364cea91ddbb2c (patch) | |
tree | f18390c05c4aa6ce5b15572b470db4bd4791129b /streaming | |
parent | 12ed2d793b1b4823b0df047a47677bb0667bf43d (diff) |
Change how hashtags are normalized (#18795)
* Change how hashtags are normalized * Fix tests
Diffstat (limited to 'streaming')
-rw-r--r-- | streaming/index.js | 32 |
1 files changed, 30 insertions, 2 deletions
diff --git a/streaming/index.js b/streaming/index.js index 792ec5a44..a55181bad 100644 --- a/streaming/index.js +++ b/streaming/index.js @@ -893,6 +893,34 @@ const startWorker = async (workerId) => { }; /** + * See app/lib/ascii_folder.rb for the canon definitions + * of these constants + */ + const NON_ASCII_CHARS = 'ÀÁÂÃÄÅàáâãäåĀāĂ㥹ÇçĆćĈĉĊċČčÐðĎďĐđÈÉÊËèéêëĒēĔĕĖėĘęĚěĜĝĞğĠġĢģĤĥĦħÌÍÎÏìíîïĨĩĪīĬĭĮįİıĴĵĶķĸĹĺĻļĽľĿŀŁłÑñŃńŅņŇňʼnŊŋÒÓÔÕÖØòóôõöøŌōŎŏŐőŔŕŖŗŘřŚśŜŝŞşŠšſŢţŤťŦŧÙÚÛÜùúûüŨũŪūŬŭŮůŰűŲųŴŵÝýÿŶŷŸŹźŻżŽž'; + const EQUIVALENT_ASCII_CHARS = 'AAAAAAaaaaaaAaAaAaCcCcCcCcCcDdDdDdEEEEeeeeEeEeEeEeEeGgGgGgGgHhHhIIIIiiiiIiIiIiIiIiJjKkkLlLlLlLlLlNnNnNnNnnNnOOOOOOooooooOoOoOoRrRrRrSsSsSsSssTtTtTtUUUUuuuuUuUuUuUuUuUuWwYyyYyYZzZzZz'; + + /** + * @param {string} str + * @return {string} + */ + const foldToASCII = str => { + const regex = new RegExp(NON_ASCII_CHARS.split('').join('|'), 'g'); + + return str.replace(regex, match => { + const index = NON_ASCII_CHARS.indexOf(match); + return EQUIVALENT_ASCII_CHARS[index]; + }); + }; + + /** + * @param {string} str + * @return {string} + */ + const normalizeHashtag = str => { + return foldToASCII(str.normalize('NFKC').toLowerCase()).replace(/[^\p{L}\p{N}_\u00b7\u200c]/gu, ''); + }; + + /** * @param {any} req * @param {string} name * @param {StreamParams} params @@ -968,7 +996,7 @@ const startWorker = async (workerId) => { reject('No tag for stream provided'); } else { resolve({ - channelIds: [`timeline:hashtag:${params.tag.toLowerCase()}`], + channelIds: [`timeline:hashtag:${normalizeHashtag(params.tag)}`], options: { needsFiltering: true }, }); } @@ -979,7 +1007,7 @@ const startWorker = async (workerId) => { reject('No tag for stream provided'); } else { resolve({ - channelIds: [`timeline:hashtag:${params.tag.toLowerCase()}:local`], + channelIds: [`timeline:hashtag:${normalizeHashtag(params.tag)}:local`], options: { needsFiltering: true }, }); } |