about summary refs log tree commit diff
path: root/app/lib/hashtag_normalizer.rb
diff options
context:
space:
mode:
authorClaire <claire.github-309c@sitedethib.com>2022-07-17 23:10:31 +0200
committerGitHub <noreply@github.com>2022-07-17 23:10:31 +0200
commitab1488a6ad93f572e1d184cb9653f76fd408474f (patch)
treef1dd126f4745eb99243f390169fb957a72f5acc6 /app/lib/hashtag_normalizer.rb
parent325ebb76b19bd20e1b76d8bc26c11cab02f6571c (diff)
parent6c0d73a675d62f676b005c06593fd69e9a7bc0e5 (diff)
Merge pull request #1804 from ClearlyClaire/glitch-soc/merge-upstream
Merge upstream changes
Diffstat (limited to 'app/lib/hashtag_normalizer.rb')
-rw-r--r--app/lib/hashtag_normalizer.rb25
1 files changed, 25 insertions, 0 deletions
diff --git a/app/lib/hashtag_normalizer.rb b/app/lib/hashtag_normalizer.rb
new file mode 100644
index 000000000..c1f99e163
--- /dev/null
+++ b/app/lib/hashtag_normalizer.rb
@@ -0,0 +1,25 @@
+# frozen_string_literal: true
+
+class HashtagNormalizer
+  def normalize(str)
+    remove_invalid_characters(ascii_folding(lowercase(cjk_width(str))))
+  end
+
+  private
+
+  def remove_invalid_characters(str)
+    str.gsub(/[^[:alnum:]#{Tag::HASHTAG_SEPARATORS}]/, '')
+  end
+
+  def ascii_folding(str)
+    ASCIIFolding.new.fold(str)
+  end
+
+  def lowercase(str)
+    str.mb_chars.downcase.to_s
+  end
+
+  def cjk_width(str)
+    str.unicode_normalize(:nfkc)
+  end
+end