From 007ab330e6ffb1e07995d4e306473d457043e2eb Mon Sep 17 00:00:00 2001 From: nullkal Date: Sun, 9 Jul 2017 05:44:31 +0900 Subject: Use charlock_holmes instead of nkf at FetchLinkCardService (#4080) * Specs for language detection * Use CharlockHolmes instead of NKF * Correct mistakes * Correct style * Set hint_enc instead of falling back and strip_tags * Improve specs * Add dependencies --- app/services/fetch_link_card_service.rb | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'app') diff --git a/app/services/fetch_link_card_service.rb b/app/services/fetch_link_card_service.rb index 8ddaa2bf4..6ef3abb66 100644 --- a/app/services/fetch_link_card_service.rb +++ b/app/services/fetch_link_card_service.rb @@ -1,5 +1,4 @@ # frozen_string_literal: true -require 'nkf' class FetchLinkCardService < BaseService include HttpHelper @@ -86,7 +85,12 @@ class FetchLinkCardService < BaseService return if response.code != 200 || response.mime_type != 'text/html' html = response.to_s - page = Nokogiri::HTML(html, nil, NKF.guess(html).to_s) + + detector = CharlockHolmes::EncodingDetector.new + detector.strip_tags = true + + guess = detector.detect(html, response.charset) + page = Nokogiri::HTML(html, nil, guess&.fetch(:encoding)) card.type = :link card.title = meta_property(page, 'og:title') || page.at_xpath('//title')&.content -- cgit