diff options
author | nullkal <nullkal@users.noreply.github.com> | 2017-07-09 05:44:31 +0900 |
---|---|---|
committer | Eugen Rochko <eugen@zeonfederated.com> | 2017-07-08 22:44:31 +0200 |
commit | 007ab330e6ffb1e07995d4e306473d457043e2eb (patch) | |
tree | baf206a3c7ae9f626a679fb3c7b650e694d8d480 /app | |
parent | 794781d1219112482e4abbc0a98683a17d170e2b (diff) |
Use charlock_holmes instead of nkf at FetchLinkCardService (#4080)
* Specs for language detection * Use CharlockHolmes instead of NKF * Correct mistakes * Correct style * Set hint_enc instead of falling back and strip_tags * Improve specs * Add dependencies
Diffstat (limited to 'app')
-rw-r--r-- | app/services/fetch_link_card_service.rb | 8 |
1 files changed, 6 insertions, 2 deletions
diff --git a/app/services/fetch_link_card_service.rb b/app/services/fetch_link_card_service.rb index 8ddaa2bf4..6ef3abb66 100644 --- a/app/services/fetch_link_card_service.rb +++ b/app/services/fetch_link_card_service.rb @@ -1,5 +1,4 @@ # frozen_string_literal: true -require 'nkf' class FetchLinkCardService < BaseService include HttpHelper @@ -86,7 +85,12 @@ class FetchLinkCardService < BaseService return if response.code != 200 || response.mime_type != 'text/html' html = response.to_s - page = Nokogiri::HTML(html, nil, NKF.guess(html).to_s) + + detector = CharlockHolmes::EncodingDetector.new + detector.strip_tags = true + + guess = detector.detect(html, response.charset) + page = Nokogiri::HTML(html, nil, guess&.fetch(:encoding)) card.type = :link card.title = meta_property(page, 'og:title') || page.at_xpath('//title')&.content |