diff options
author | Claire <claire.github-309c@sitedethib.com> | 2022-07-17 23:10:31 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-07-17 23:10:31 +0200 |
commit | ab1488a6ad93f572e1d184cb9653f76fd408474f (patch) | |
tree | f1dd126f4745eb99243f390169fb957a72f5acc6 /app/lib/hashtag_normalizer.rb | |
parent | 325ebb76b19bd20e1b76d8bc26c11cab02f6571c (diff) | |
parent | 6c0d73a675d62f676b005c06593fd69e9a7bc0e5 (diff) |
Merge pull request #1804 from ClearlyClaire/glitch-soc/merge-upstream
Merge upstream changes
Diffstat (limited to 'app/lib/hashtag_normalizer.rb')
-rw-r--r-- | app/lib/hashtag_normalizer.rb | 25 |
1 files changed, 25 insertions, 0 deletions
diff --git a/app/lib/hashtag_normalizer.rb b/app/lib/hashtag_normalizer.rb new file mode 100644 index 000000000..c1f99e163 --- /dev/null +++ b/app/lib/hashtag_normalizer.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +class HashtagNormalizer + def normalize(str) + remove_invalid_characters(ascii_folding(lowercase(cjk_width(str)))) + end + + private + + def remove_invalid_characters(str) + str.gsub(/[^[:alnum:]#{Tag::HASHTAG_SEPARATORS}]/, '') + end + + def ascii_folding(str) + ASCIIFolding.new.fold(str) + end + + def lowercase(str) + str.mb_chars.downcase.to_s + end + + def cjk_width(str) + str.unicode_normalize(:nfkc) + end +end |