about summary refs log tree commit diff
path: root/app
diff options
context:
space:
mode:
authornullkal <nullkal@users.noreply.github.com>2017-07-09 05:44:31 +0900
committerEugen Rochko <eugen@zeonfederated.com>2017-07-08 22:44:31 +0200
commit007ab330e6ffb1e07995d4e306473d457043e2eb (patch)
treebaf206a3c7ae9f626a679fb3c7b650e694d8d480 /app
parent794781d1219112482e4abbc0a98683a17d170e2b (diff)
Use charlock_holmes instead of nkf at FetchLinkCardService (#4080)
* Specs for language detection

* Use CharlockHolmes instead of NKF

* Correct mistakes

* Correct style

* Set hint_enc instead of falling back and strip_tags

* Improve specs

* Add dependencies
Diffstat (limited to 'app')
-rw-r--r--app/services/fetch_link_card_service.rb8
1 files changed, 6 insertions, 2 deletions
diff --git a/app/services/fetch_link_card_service.rb b/app/services/fetch_link_card_service.rb
index 8ddaa2bf4..6ef3abb66 100644
--- a/app/services/fetch_link_card_service.rb
+++ b/app/services/fetch_link_card_service.rb
@@ -1,5 +1,4 @@
 # frozen_string_literal: true
-require 'nkf'
 
 class FetchLinkCardService < BaseService
   include HttpHelper
@@ -86,7 +85,12 @@ class FetchLinkCardService < BaseService
     return if response.code != 200 || response.mime_type != 'text/html'
 
     html = response.to_s
-    page = Nokogiri::HTML(html, nil, NKF.guess(html).to_s)
+
+    detector = CharlockHolmes::EncodingDetector.new
+    detector.strip_tags = true
+
+    guess = detector.detect(html, response.charset)
+    page = Nokogiri::HTML(html, nil, guess&.fetch(:encoding))
 
     card.type             = :link
     card.title            = meta_property(page, 'og:title') || page.at_xpath('//title')&.content